blob: ea1ff4e56b959b33b4492f78e6bd0ac4dbf57585 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm5, %xmm5
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kmovw %k0, %esi
; NoVLX-NEXT: vmovd %esi, %xmm1
; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
%3 = zext i16 %2 to i32
ret i32 %3
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovw %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
%3 = zext i16 %2 to i32
ret i32 %3
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzwl %ax, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
%3 = zext i16 %2 to i64
ret i64 %3
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzwl %ax, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
%3 = zext i16 %2 to i64
ret i64 %3
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $1, %k2, %k2
; NoVLX-NEXT: kshiftlw $1, %k2, %k2
; NoVLX-NEXT: korw %k1, %k2, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $13, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $12, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i16
ret i16 %3
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i16
ret i16 %3
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i32
ret i32 %3
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i32
ret i32 %3
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: kmovw %k0, %edx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i64
ret i64 %3
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i64
ret i64 %3
}
; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
; VLX-LABEL: test_cmpm_rnd_zero:
; VLX: # %bb.0:
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_cmpm_rnd_zero:
; NoVLX: # %bb.0:
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
%cast = bitcast i16 %res to <16 x i1>
%shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%cast2 = bitcast <32 x i1> %shuffle to i32
ret i32 %cast2
}
define i8 @mask_zero_lower(<4 x i32> %a) {
; VLX-LABEL: mask_zero_lower:
; VLX: # %bb.0:
; VLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kshiftlb $4, %k0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: mask_zero_lower:
; NoVLX: # %bb.0:
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: kxorw %k0, %k0, %k2
; NoVLX-NEXT: kshiftrw $4, %k2, %k3
; NoVLX-NEXT: kxorw %k1, %k3, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $11, %k1, %k1
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftrw $5, %k1, %k2
; NoVLX-NEXT: kxorw %k0, %k2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k0
; NoVLX-NEXT: kshiftrw $10, %k0, %k0
; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $9, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k1
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k2
; NoVLX-NEXT: kxorw %k2, %k1, %k1
; NoVLX-NEXT: kshiftlw $15, %k1, %k1
; NoVLX-NEXT: kshiftrw $8, %k1, %k1
; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: retq
%cmp = icmp ult <4 x i32> %a, zeroinitializer
%concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
%cast = bitcast <8 x i1> %concat to i8
ret i8 %cast
}