blob: 26f4bd578c24dbb0257f891af579bc1d5a5bfb5f [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
; This test is an assembly of avx512 shuffling instructions to check their scheduling
define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) {
; GENERIC-LABEL: test_16xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) {
; GENERIC-LABEL: test_16xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) {
; GENERIC-LABEL: test_16xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) {
; GENERIC-LABEL: test_16xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
ret <16 x i16> %res
}
define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
ret <16 x i16> %res
}
define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
%cmp = icmp eq <16 x i16> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) {
; GENERIC-LABEL: test_32xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) {
; GENERIC-LABEL: test_32xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) {
; GENERIC-LABEL: test_32xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) {
; GENERIC-LABEL: test_32xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
ret <32 x i16> %res
}
define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
ret <32 x i16> %res
}
define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) {
; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <32 x i16>, <32 x i16>* %vp
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
%cmp = icmp eq <32 x i16> %mask, zeroinitializer
%res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
; GENERIC-LABEL: test_8xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
; GENERIC-LABEL: test_8xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) {
; GENERIC-LABEL: test_8xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) {
; GENERIC-LABEL: test_8xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
ret <8 x i32> %res
}
define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
ret <8 x i32> %res
}
define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <8 x i32>, <8 x i32>* %vp
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
%cmp = icmp eq <8 x i32> %mask, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_16xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
; GENERIC-LABEL: test_16xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
; GENERIC-LABEL: test_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
; GENERIC-LABEL: test_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
ret <16 x i32> %res
}
define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
ret <16 x i32> %res
}
define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i32>, <16 x i32>* %vp
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
%cmp = icmp eq <16 x i32> %mask, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) {
; GENERIC-LABEL: test_4xi64_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mask2:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) {
; GENERIC-LABEL: test_4xi64_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00]
; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mask3:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) {
; GENERIC-LABEL: test_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <4 x i64>, <4 x i64>* %vp
%res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <4 x i64>, <4 x i64>* %vp
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <4 x i64>, <4 x i64>* %vp
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask1:
; SKX: # %bb.0:
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <4 x i64>, <4 x i64>* %vp
%shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
%res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
ret <4 x i64> %res
}
define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) {
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;