| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL |
| ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX |
| ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW |
| ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ |
| |
| |
| define i16 @mask16(i16 %x) { |
| ; KNL-LABEL: mask16: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mask16: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: knotw %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mask16: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mask16: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: knotw %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <16 x i1> %m1 to i16 |
| ret i16 %ret |
| } |
| |
| define i32 @mask16_zext(i16 %x) { |
| ; KNL-LABEL: mask16_zext: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mask16_zext: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: knotw %k0, %k0 |
| ; SKX-NEXT: kmovw %k0, %eax |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mask16_zext: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovw %k0, %eax |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mask16_zext: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: knotw %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %m2 = bitcast <16 x i1> %m1 to i16 |
| %ret = zext i16 %m2 to i32 |
| ret i32 %ret |
| } |
| |
| define i8 @mask8(i8 %x) { |
| ; KNL-LABEL: mask8: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mask8: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: knotb %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mask8: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mask8: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: knotb %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <8 x i1> %m1 to i8 |
| ret i8 %ret |
| } |
| |
| define i32 @mask8_zext(i8 %x) { |
| ; KNL-LABEL: mask8_zext: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movzbl %al, %eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mask8_zext: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: knotb %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, %eax |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mask8_zext: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movzbl %al, %eax |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mask8_zext: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: knotb %k0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, %eax |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %m2 = bitcast <8 x i1> %m1 to i8 |
| %ret = zext i8 %m2 to i32 |
| ret i32 %ret |
| } |
| |
| define void @mask16_mem(i16* %ptr) { |
| ; CHECK-LABEL: mask16_mem: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: kmovw (%rdi), %k0 |
| ; CHECK-NEXT: knotw %k0, %k0 |
| ; CHECK-NEXT: kmovw %k0, (%rdi) |
| ; CHECK-NEXT: retq |
| %x = load i16, i16* %ptr, align 4 |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <16 x i1> %m1 to i16 |
| store i16 %ret, i16* %ptr, align 4 |
| ret void |
| } |
| |
| define void @mask8_mem(i8* %ptr) { |
| ; KNL-LABEL: mask8_mem: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: movzbl (%rdi), %eax |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mask8_mem: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 |
| ; SKX-NEXT: knotb %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mask8_mem: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movzbl (%rdi), %eax |
| ; AVX512BW-NEXT: kmovd %eax, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mask8_mem: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovb (%rdi), %k0 |
| ; AVX512DQ-NEXT: knotb %k0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: retq |
| %x = load i8, i8* %ptr, align 4 |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <8 x i1> %m1 to i8 |
| store i8 %ret, i8* %ptr, align 4 |
| ret void |
| } |
| |
| define i16 @mand16(i16 %x, i16 %y) { |
| ; CHECK-LABEL: mand16: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: xorl %esi, %eax |
| ; CHECK-NEXT: andl %esi, %edi |
| ; CHECK-NEXT: orl %eax, %edi |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: retq |
| %ma = bitcast i16 %x to <16 x i1> |
| %mb = bitcast i16 %y to <16 x i1> |
| %mc = and <16 x i1> %ma, %mb |
| %md = xor <16 x i1> %ma, %mb |
| %me = or <16 x i1> %mc, %md |
| %ret = bitcast <16 x i1> %me to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { |
| ; KNL-LABEL: mand16_mem: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw (%rdi), %k0 |
| ; KNL-NEXT: kmovw (%rsi), %k1 |
| ; KNL-NEXT: kandw %k1, %k0, %k2 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: korw %k0, %k2, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: mand16_mem: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovw (%rdi), %k0 |
| ; SKX-NEXT: kmovw (%rsi), %k1 |
| ; SKX-NEXT: kandw %k1, %k0, %k2 |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 |
| ; SKX-NEXT: korw %k0, %k2, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: mand16_mem: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovw (%rdi), %k0 |
| ; AVX512BW-NEXT: kmovw (%rsi), %k1 |
| ; AVX512BW-NEXT: kandw %k1, %k0, %k2 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: korw %k0, %k2, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: mand16_mem: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw (%rdi), %k0 |
| ; AVX512DQ-NEXT: kmovw (%rsi), %k1 |
| ; AVX512DQ-NEXT: kandw %k1, %k0, %k2 |
| ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: korw %k0, %k2, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %ma = load <16 x i1>, <16 x i1>* %x |
| %mb = load <16 x i1>, <16 x i1>* %y |
| %mc = and <16 x i1> %ma, %mb |
| %md = xor <16 x i1> %ma, %mb |
| %me = or <16 x i1> %mc, %md |
| %ret = bitcast <16 x i1> %me to i16 |
| ret i16 %ret |
| } |
| |
| define i8 @shuf_test1(i16 %v) nounwind { |
| ; KNL-LABEL: shuf_test1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kshiftrw $8, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; KNL-NEXT: ## -- End function |
| ; |
| ; SKX-LABEL: shuf_test1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kshiftrw $8, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; SKX-NEXT: ## -- End function |
| ; |
| ; AVX512BW-LABEL: shuf_test1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; AVX512BW-NEXT: ## -- End function |
| ; |
| ; AVX512DQ-LABEL: shuf_test1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| ; AVX512DQ-NEXT: ## -- End function |
| %v1 = bitcast i16 %v to <16 x i1> |
| %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %mask1 = bitcast <8 x i1> %mask to i8 |
| ret i8 %mask1 |
| } |
| |
| define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { |
| ; KNL-LABEL: zext_test1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: andl $1, %eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_test1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: kshiftlw $10, %k0, %k0 |
| ; SKX-NEXT: kshiftrw $15, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: andl $1, %eax |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: zext_test1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: andl $1, %eax |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_test1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: andl $1, %eax |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i32 |
| ret i32 %res |
| } |
| |
| define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { |
| ; KNL-LABEL: zext_test2: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: andl $1, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_test2: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: kshiftlw $10, %k0, %k0 |
| ; SKX-NEXT: kshiftrw $15, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: andl $1, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: zext_test2: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: andl $1, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_test2: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: andl $1, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i16 |
| ret i16 %res |
| } |
| |
| define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { |
| ; KNL-LABEL: zext_test3: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: andb $1, %al |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_test3: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: kshiftlw $10, %k0, %k0 |
| ; SKX-NEXT: kshiftrw $15, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: andb $1, %al |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: zext_test3: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512BW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: andb $1, %al |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_test3: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: andb $1, %al |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i8 |
| ret i8 %res |
| } |
| |
| define i8 @conv1(<8 x i1>* %R) { |
| ; KNL-LABEL: conv1: |
| ; KNL: ## BB#0: ## %entry |
| ; KNL-NEXT: kxnorw %k0, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movb $-2, %al |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: conv1: |
| ; SKX: ## BB#0: ## %entry |
| ; SKX-NEXT: kxnorw %k0, %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) |
| ; SKX-NEXT: movb $-2, %al |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: conv1: |
| ; AVX512BW: ## BB#0: ## %entry |
| ; AVX512BW-NEXT: kxnorw %k0, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: movb $-2, -{{[0-9]+}}(%rsp) |
| ; AVX512BW-NEXT: movb $-2, %al |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: conv1: |
| ; AVX512DQ: ## BB#0: ## %entry |
| ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: movb $-2, -{{[0-9]+}}(%rsp) |
| ; AVX512DQ-NEXT: movb $-2, %al |
| ; AVX512DQ-NEXT: retq |
| entry: |
| store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R |
| |
| %maskPtr = alloca <8 x i1> |
| store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr |
| %mask = load <8 x i1>, <8 x i1>* %maskPtr |
| %mask_convert = bitcast <8 x i1> %mask to i8 |
| ret i8 %mask_convert |
| } |
| |
| define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { |
| ; KNL-LABEL: test4: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpmovqd %zmm0, %ymm0 |
| ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 |
| ; KNL-NEXT: vpmovqd %zmm1, %ymm1 |
| ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test4: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 |
| ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 |
| ; SKX-NEXT: kandnw %k0, %k1, %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test4: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 |
| ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 |
| ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 |
| ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test4: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 |
| ; AVX512DQ-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 |
| ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 |
| ; AVX512DQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %x_gt_y = icmp sgt <4 x i64> %x, %y |
| %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 |
| %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 |
| %resse = sext <4 x i1>%res to <4 x i32> |
| ret <4 x i32> %resse |
| } |
| |
| define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { |
| ; KNL-LABEL: test5: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 |
| ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test5: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 |
| ; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 |
| ; SKX-NEXT: kandnw %k1, %k0, %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test5: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 |
| ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test5: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; AVX512DQ-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 |
| ; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 |
| ; AVX512DQ-NEXT: retq |
| %x_gt_y = icmp slt <2 x i64> %x, %y |
| %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 |
| %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 |
| %resse = sext <2 x i1>%res to <2 x i64> |
| ret <2 x i64> %resse |
| }define void @test6(<16 x i1> %mask) { |
| allocas: |
| %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |
| %b = bitcast <16 x i1> %a to i16 |
| %c = icmp eq i16 %b, 0 |
| br i1 %c, label %true, label %false |
| |
| true: |
| ret void |
| |
| false: |
| ret void |
| } |
| define void @test7(<8 x i1> %mask) { |
| ; KNL-LABEL: test7: |
| ; KNL: ## BB#0: ## %allocas |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: movb $85, %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: testb %al, %al |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test7: |
| ; SKX: ## BB#0: ## %allocas |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 |
| ; SKX-NEXT: movb $85, %al |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: ktestb %k0, %k0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test7: |
| ; AVX512BW: ## BB#0: ## %allocas |
| ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 |
| ; AVX512BW-NEXT: movb $85, %al |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: testb %al, %al |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test7: |
| ; AVX512DQ: ## BB#0: ## %allocas |
| ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: movb $85, %al |
| ; AVX512DQ-NEXT: kmovw %eax, %k1 |
| ; AVX512DQ-NEXT: korb %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: ktestb %k0, %k0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| allocas: |
| %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |
| %b = bitcast <8 x i1> %a to i8 |
| %c = icmp eq i8 %b, 0 |
| br i1 %c, label %true, label %false |
| |
| true: |
| ret void |
| |
| false: |
| ret void |
| } |
| define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { |
| ; KNL-LABEL: test8: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 |
| ; KNL-NEXT: cmpl %esi, %edi |
| ; KNL-NEXT: jg LBB17_1 |
| ; KNL-NEXT: ## BB#2: |
| ; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 |
| ; KNL-NEXT: jmp LBB17_3 |
| ; KNL-NEXT: LBB17_1: |
| ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 |
| ; KNL-NEXT: LBB17_3: |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test8: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 |
| ; SKX-NEXT: cmpl %esi, %edi |
| ; SKX-NEXT: jg LBB17_1 |
| ; SKX-NEXT: ## BB#2: |
| ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; SKX-NEXT: LBB17_1: |
| ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test8: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpxord %zmm2, %zmm2, %zmm2 |
| ; AVX512BW-NEXT: cmpl %esi, %edi |
| ; AVX512BW-NEXT: jg LBB17_1 |
| ; AVX512BW-NEXT: ## BB#2: |
| ; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0 |
| ; AVX512BW-NEXT: jmp LBB17_3 |
| ; AVX512BW-NEXT: LBB17_1: |
| ; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 |
| ; AVX512BW-NEXT: LBB17_3: |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test8: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpxord %zmm2, %zmm2, %zmm2 |
| ; AVX512DQ-NEXT: cmpl %esi, %edi |
| ; AVX512DQ-NEXT: jg LBB17_1 |
| ; AVX512DQ-NEXT: ## BB#2: |
| ; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0 |
| ; AVX512DQ-NEXT: jmp LBB17_3 |
| ; AVX512DQ-NEXT: LBB17_1: |
| ; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 |
| ; AVX512DQ-NEXT: LBB17_3: |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %cond = icmp sgt i32 %a1, %b1 |
| %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer |
| %cmp2 = icmp ult <16 x i32> %b, zeroinitializer |
| %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 |
| %res = sext <16 x i1> %mix to <16 x i8> |
| ret <16 x i8> %res |
| } |
| define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { |
| ; KNL-LABEL: test9: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: cmpl %esi, %edi |
| ; KNL-NEXT: jg LBB18_1 |
| ; KNL-NEXT: ## BB#2: |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 |
| ; KNL-NEXT: jmp LBB18_3 |
| ; KNL-NEXT: LBB18_1: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: LBB18_3: |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test9: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: cmpl %esi, %edi |
| ; SKX-NEXT: jg LBB18_1 |
| ; SKX-NEXT: ## BB#2: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 |
| ; SKX-NEXT: jmp LBB18_3 |
| ; SKX-NEXT: LBB18_1: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: LBB18_3: |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test9: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: cmpl %esi, %edi |
| ; AVX512BW-NEXT: jg LBB18_1 |
| ; AVX512BW-NEXT: ## BB#2: |
| ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 |
| ; AVX512BW-NEXT: jmp LBB18_3 |
| ; AVX512BW-NEXT: LBB18_1: |
| ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: LBB18_3: |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test9: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: cmpl %esi, %edi |
| ; AVX512DQ-NEXT: jg LBB18_1 |
| ; AVX512DQ-NEXT: ## BB#2: |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 |
| ; AVX512DQ-NEXT: jmp LBB18_3 |
| ; AVX512DQ-NEXT: LBB18_1: |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: LBB18_3: |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b |
| ret <16 x i1>%c |
| }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b |
| ret <8 x i1>%c |
| } |
| |
| define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { |
| ; KNL-LABEL: test11: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: cmpl %esi, %edi |
| ; KNL-NEXT: jg LBB20_2 |
| ; KNL-NEXT: ## BB#1: |
| ; KNL-NEXT: vmovaps %xmm1, %xmm0 |
| ; KNL-NEXT: LBB20_2: |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test11: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: cmpl %esi, %edi |
| ; SKX-NEXT: jg LBB20_1 |
| ; SKX-NEXT: ## BB#2: |
| ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 |
| ; SKX-NEXT: jmp LBB20_3 |
| ; SKX-NEXT: LBB20_1: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; SKX-NEXT: LBB20_3: |
| ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test11: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: cmpl %esi, %edi |
| ; AVX512BW-NEXT: jg LBB20_2 |
| ; AVX512BW-NEXT: ## BB#1: |
| ; AVX512BW-NEXT: vmovaps %xmm1, %xmm0 |
| ; AVX512BW-NEXT: LBB20_2: |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test11: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: cmpl %esi, %edi |
| ; AVX512DQ-NEXT: jg LBB20_2 |
| ; AVX512DQ-NEXT: ## BB#1: |
| ; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 |
| ; AVX512DQ-NEXT: LBB20_2: |
| ; AVX512DQ-NEXT: retq |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b |
| ret <4 x i1>%c |
| } |
| |
| define i32 @test12(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test12: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: retq |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 0 |
| %c = select i1 %b, i32 %x, i32 %y |
| ret i32 %c |
| } |
| |
| define i32 @test13(i32 %x, i32 %y) { |
| ; CHECK-LABEL: test13: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: movl %esi, %eax |
| ; CHECK-NEXT: retq |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 3 |
| %c = select i1 %b, i32 %x, i32 %y |
| ret i32 %c |
| }define <4 x i1> @test14() { |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 2 |
| %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 |
| ret <4 x i1> %c |
| } |
| |
| define <16 x i1> @test15(i32 %x, i32 %y) { |
| ; KNL-LABEL: test15: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: cmpl %esi, %edi |
| ; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 |
| ; KNL-NEXT: movw $1, %cx |
| ; KNL-NEXT: cmovgw %ax, %cx |
| ; KNL-NEXT: kmovw %ecx, %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test15: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: cmpl %esi, %edi |
| ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 |
| ; SKX-NEXT: movw $1, %cx |
| ; SKX-NEXT: cmovgw %ax, %cx |
| ; SKX-NEXT: kmovd %ecx, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test15: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: cmpl %esi, %edi |
| ; AVX512BW-NEXT: movw $21845, %ax ## imm = 0x5555 |
| ; AVX512BW-NEXT: movw $1, %cx |
| ; AVX512BW-NEXT: cmovgw %ax, %cx |
| ; AVX512BW-NEXT: kmovd %ecx, %k0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test15: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: cmpl %esi, %edi |
| ; AVX512DQ-NEXT: movw $21845, %ax ## imm = 0x5555 |
| ; AVX512DQ-NEXT: movw $1, %cx |
| ; AVX512DQ-NEXT: cmovgw %ax, %cx |
| ; AVX512DQ-NEXT: kmovw %ecx, %k0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = bitcast i16 1 to <16 x i1> |
| %mask = icmp sgt i32 %x, %y |
| %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b |
| ret <16 x i1> %c |
| } |
| |
| define <64 x i8> @test16(i64 %x) { |
| ; |
| ; KNL-LABEL: test16: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: Lcfi0: |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: Lcfi1: |
| ; KNL-NEXT: .cfi_offset %rbp, -16 |
| ; KNL-NEXT: movq %rsp, %rbp |
| ; KNL-NEXT: Lcfi2: |
| ; KNL-NEXT: .cfi_def_cfa_register %rbp |
| ; KNL-NEXT: andq $-32, %rsp |
| ; KNL-NEXT: subq $64, %rsp |
| ; KNL-NEXT: movl %edi, (%rsp) |
| ; KNL-NEXT: shrq $32, %rdi |
| ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: kmovw (%rsp), %k1 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: movl $1, %eax |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 |
| ; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} |
| ; KNL-NEXT: vpmovdb %zmm2, %xmm2 |
| ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 |
| ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 |
| ; KNL-NEXT: movq %rbp, %rsp |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test16: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovq %rdi, %k0 |
| ; SKX-NEXT: movb $1, %al |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: vpmovm2b %k1, %zmm0 |
| ; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovm2b %k0, %zmm1 |
| ; SKX-NEXT: movl $32, %eax |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} |
| ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] |
| ; SKX-NEXT: vpmovb2m %zmm0, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test16: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovq %rdi, %k0 |
| ; AVX512BW-NEXT: movb $1, %al |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 |
| ; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 |
| ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| ; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test16: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: pushq %rbp |
| ; AVX512DQ-NEXT: Lcfi0: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 |
| ; AVX512DQ-NEXT: Lcfi1: |
| ; AVX512DQ-NEXT: .cfi_offset %rbp, -16 |
| ; AVX512DQ-NEXT: movq %rsp, %rbp |
| ; AVX512DQ-NEXT: Lcfi2: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp |
| ; AVX512DQ-NEXT: andq $-32, %rsp |
| ; AVX512DQ-NEXT: subq $64, %rsp |
| ; AVX512DQ-NEXT: movl %edi, (%rsp) |
| ; AVX512DQ-NEXT: shrq $32, %rdi |
| ; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) |
| ; AVX512DQ-NEXT: kmovw (%rsp), %k0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: movl $1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 |
| ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 |
| ; AVX512DQ-NEXT: movq %rbp, %rsp |
| ; AVX512DQ-NEXT: popq %rbp |
| ; AVX512DQ-NEXT: retq |
| %a = bitcast i64 %x to <64 x i1> |
| %b = insertelement <64 x i1>%a, i1 true, i32 5 |
| %c = sext <64 x i1>%b to <64 x i8> |
| ret <64 x i8>%c |
| } |
| |
| define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { |
| ; |
| ; KNL-LABEL: test17: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: Lcfi3: |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: Lcfi4: |
| ; KNL-NEXT: .cfi_offset %rbp, -16 |
| ; KNL-NEXT: movq %rsp, %rbp |
| ; KNL-NEXT: Lcfi5: |
| ; KNL-NEXT: .cfi_def_cfa_register %rbp |
| ; KNL-NEXT: andq $-32, %rsp |
| ; KNL-NEXT: subq $64, %rsp |
| ; KNL-NEXT: movl %edi, (%rsp) |
| ; KNL-NEXT: shrq $32, %rdi |
| ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: kmovw (%rsp), %k1 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: xorl %eax, %eax |
| ; KNL-NEXT: cmpl %edx, %esi |
| ; KNL-NEXT: setg %al |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 |
| ; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} |
| ; KNL-NEXT: vpmovdb %zmm2, %xmm2 |
| ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 |
| ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 |
| ; KNL-NEXT: movq %rbp, %rsp |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test17: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovq %rdi, %k0 |
| ; SKX-NEXT: cmpl %edx, %esi |
| ; SKX-NEXT: setg %al |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: vpmovm2b %k1, %zmm0 |
| ; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovm2b %k0, %zmm1 |
| ; SKX-NEXT: movl $32, %eax |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} |
| ; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] |
| ; SKX-NEXT: vpmovb2m %zmm0, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test17: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovq %rdi, %k0 |
| ; AVX512BW-NEXT: cmpl %edx, %esi |
| ; AVX512BW-NEXT: setg %al |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 |
| ; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 |
| ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| ; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test17: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: pushq %rbp |
| ; AVX512DQ-NEXT: Lcfi3: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 |
| ; AVX512DQ-NEXT: Lcfi4: |
| ; AVX512DQ-NEXT: .cfi_offset %rbp, -16 |
| ; AVX512DQ-NEXT: movq %rsp, %rbp |
| ; AVX512DQ-NEXT: Lcfi5: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp |
| ; AVX512DQ-NEXT: andq $-32, %rsp |
| ; AVX512DQ-NEXT: subq $64, %rsp |
| ; AVX512DQ-NEXT: movl %edi, (%rsp) |
| ; AVX512DQ-NEXT: shrq $32, %rdi |
| ; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) |
| ; AVX512DQ-NEXT: kmovw (%rsp), %k0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: xorl %eax, %eax |
| ; AVX512DQ-NEXT: cmpl %edx, %esi |
| ; AVX512DQ-NEXT: setg %al |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 |
| ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 |
| ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 |
| ; AVX512DQ-NEXT: movq %rbp, %rsp |
| ; AVX512DQ-NEXT: popq %rbp |
| ; AVX512DQ-NEXT: retq |
| %a = bitcast i64 %x to <64 x i1> |
| %b = icmp sgt i32 %y, %z |
| %c = insertelement <64 x i1>%a, i1 %b, i32 5 |
| %d = sext <64 x i1>%c to <64 x i8> |
| ret <64 x i8>%d |
| } |
| |
| define <8 x i1> @test18(i8 %a, i16 %y) { |
| ; KNL-LABEL: test18: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %esi, %k0 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k2 |
| ; KNL-NEXT: kshiftrw $15, %k2, %k2 |
| ; KNL-NEXT: kmovw %k2, %eax |
| ; KNL-NEXT: kshiftlw $6, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ecx |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: kmovw %ecx, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] |
| ; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 |
| ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $7, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovqw %zmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test18: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kshiftlw $7, %k1, %k2 |
| ; SKX-NEXT: kshiftrw $15, %k2, %k2 |
| ; SKX-NEXT: kmovd %k2, %eax |
| ; SKX-NEXT: kshiftlw $6, %k1, %k1 |
| ; SKX-NEXT: kshiftrw $15, %k1, %k1 |
| ; SKX-NEXT: kmovd %k1, %ecx |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 |
| ; SKX-NEXT: kmovd %ecx, %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %zmm1 |
| ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] |
| ; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 |
| ; SKX-NEXT: vpmovq2m %zmm2, %k0 |
| ; SKX-NEXT: kshiftlb $1, %k0, %k0 |
| ; SKX-NEXT: kshiftrb $1, %k0, %k0 |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test18: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k1 |
| ; AVX512BW-NEXT: kmovd %esi, %k0 |
| ; AVX512BW-NEXT: kshiftlw $7, %k0, %k2 |
| ; AVX512BW-NEXT: kshiftrw $15, %k2, %k2 |
| ; AVX512BW-NEXT: kmovd %k2, %eax |
| ; AVX512BW-NEXT: kshiftlw $6, %k0, %k0 |
| ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %ecx |
| ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: kmovd %ecx, %k1 |
| ; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} |
| ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] |
| ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 |
| ; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0 |
| ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 |
| ; AVX512BW-NEXT: kshiftrw $1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: kshiftlw $7, %k1, %k1 |
| ; AVX512BW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test18: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2 |
| ; AVX512DQ-NEXT: kmovw %k2, %eax |
| ; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ecx |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQ-NEXT: kmovw %ecx, %k0 |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm1 |
| ; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] |
| ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 |
| ; AVX512DQ-NEXT: vpmovq2m %zmm2, %k0 |
| ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 |
| ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %eax, %k1 |
| ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 |
| ; AVX512DQ-NEXT: korb %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %b = bitcast i8 %a to <8 x i1> |
| %b1 = bitcast i16 %y to <16 x i1> |
| %el1 = extractelement <16 x i1>%b1, i32 8 |
| %el2 = extractelement <16 x i1>%b1, i32 9 |
| %c = insertelement <8 x i1>%b, i1 %el1, i32 7 |
| %d = insertelement <8 x i1>%c, i1 %el2, i32 6 |
| ret <8 x i1>%d |
| } |
| define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: test21: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 |
| ; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 |
| ; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 |
| ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 |
| ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 |
| ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 |
| ; KNL-NEXT: retq |
| ; KNL-NEXT: ## -- End function |
| ; |
| ; SKX-LABEL: test21: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; SKX-NEXT: ## -- End function |
| ; |
| ; AVX512BW-LABEL: test21: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1 |
| ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 |
| ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: retq |
| ; AVX512BW-NEXT: ## -- End function |
| ; |
| ; AVX512DQ-LABEL: test21: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero |
| ; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm3 |
| ; AVX512DQ-NEXT: vpsraw $15, %ymm3, %ymm3 |
| ; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0 |
| ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 |
| ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero |
| ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 |
| ; AVX512DQ-NEXT: retq |
| ; AVX512DQ-NEXT: ## -- End function |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define void @test22(<4 x i1> %a, <4 x i1>* %addr) { |
| ; KNL-LABEL: test22: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test22: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test22: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test22: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <4 x i1> %a, <4 x i1>* %addr |
| ret void |
| } |
| |
| define void @test23(<2 x i1> %a, <2 x i1>* %addr) { |
| ; KNL-LABEL: test23: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test23: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test23: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> |
| ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test23: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <2 x i1> %a, <2 x i1>* %addr |
| ret void |
| } |
| |
| define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { |
| ; KNL-LABEL: store_v1i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kxnorw %k0, %k0, %k1 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rsi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_v1i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kxnorw %k0, %k0, %k1 |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rsi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_v1i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rsi) |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_v1i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1 |
| ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rsi) |
| ; AVX512DQ-NEXT: retq |
| %x = xor <1 x i1> %c, <i1 1> |
| store <1 x i1> %x, <1 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { |
| ; KNL-LABEL: store_v2i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_v2i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 |
| ; SKX-NEXT: knotw %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_v2i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_v2i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %x = xor <2 x i1> %c, <i1 1, i1 1> |
| store <2 x i1> %x, <2 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { |
| ; KNL-LABEL: store_v4i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_v4i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 |
| ; SKX-NEXT: knotw %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_v4i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_v4i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> |
| store <4 x i1> %x, <4 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { |
| ; KNL-LABEL: store_v8i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_v8i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 |
| ; SKX-NEXT: knotb %k0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_v8i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_v8i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: knotb %k0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> |
| store <8 x i1> %x, <8 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { |
| ; KNL-LABEL: store_v16i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: knotw %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_v16i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 |
| ; SKX-NEXT: knotw %k0, %k0 |
| ; SKX-NEXT: kmovw %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_v16i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: knotw %k0, %k0 |
| ; AVX512BW-NEXT: kmovw %k0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_v16i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: knotw %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> |
| store <16 x i1> %x, <16 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| ;void f2(int); |
| ;void f1(int c) |
| ;{ |
| ; static int v = 0; |
| ; if (v == 0) |
| ; v = 1; |
| ; else |
| ; v = 0; |
| ; f2(v); |
| ;} |
| |
| @f1.v = internal unnamed_addr global i1 false, align 4 |
| |
| define void @f1(i32 %c) { |
| ; CHECK-LABEL: f1: |
| ; CHECK: ## BB#0: ## %entry |
| ; CHECK-NEXT: movzbl {{.*}}(%rip), %edi |
| ; CHECK-NEXT: movb {{.*}}(%rip), %al |
| ; CHECK-NEXT: notb %al |
| ; CHECK-NEXT: andb $1, %al |
| ; CHECK-NEXT: movb %al, {{.*}}(%rip) |
| ; CHECK-NEXT: xorl $1, %edi |
| ; CHECK-NEXT: jmp _f2 ## TAILCALL |
| entry: |
| %.b1 = load i1, i1* @f1.v, align 4 |
| %not..b1 = xor i1 %.b1, true |
| store i1 %not..b1, i1* @f1.v, align 4 |
| %0 = zext i1 %not..b1 to i32 |
| tail call void @f2(i32 %0) #2 |
| ret void |
| } |
| |
| declare void @f2(i32) #1 |
| |
| define void @store_i16_i1(i16 %x, i1 *%y) { |
| ; CHECK-LABEL: store_i16_i1: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: andl $1, %edi |
| ; CHECK-NEXT: movb %dil, (%rsi) |
| ; CHECK-NEXT: retq |
| %c = trunc i16 %x to i1 |
| store i1 %c, i1* %y |
| ret void |
| } |
| |
| define void @store_i8_i1(i8 %x, i1 *%y) { |
| ; CHECK-LABEL: store_i8_i1: |
| ; CHECK: ## BB#0: |
| ; CHECK-NEXT: andl $1, %edi |
| ; CHECK-NEXT: movb %dil, (%rsi) |
| ; CHECK-NEXT: retq |
| %c = trunc i8 %x to i1 |
| store i1 %c, i1* %y |
| ret void |
| } |
| |
| define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { |
| ; KNL-LABEL: test_build_vec_v32i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 |
| ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_build_vec_v32i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 |
| ; SKX-NEXT: kmovd %eax, %k1 |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_build_vec_v32i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495 |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_build_vec_v32i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 |
| ; AVX512DQ-NEXT: retq |
| %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { |
| ; KNL-LABEL: test_build_vec_v64i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 |
| ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_build_vec_v64i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 |
| ; SKX-NEXT: kmovq %rax, %k1 |
| ; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_build_vec_v64i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 |
| ; AVX512BW-NEXT: kmovq %rax, %k1 |
| ; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_build_vec_v64i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 |
| ; AVX512DQ-NEXT: retq |
| %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer |
| ret <64 x i8> %ret |
| } |
| |
| define void @ktest_1(<8 x double> %in, double * %base) { |
| ; KNL-LABEL: ktest_1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vmovupd (%rdi), %zmm1 |
| ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 |
| ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} |
| ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: testb %al, %al |
| ; KNL-NEXT: je LBB41_2 |
| ; KNL-NEXT: ## BB#1: ## %L1 |
| ; KNL-NEXT: vmovapd %zmm0, (%rdi) |
| ; KNL-NEXT: retq |
| ; KNL-NEXT: LBB41_2: ## %L2 |
| ; KNL-NEXT: vmovapd %zmm0, 8(%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: ktest_1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vmovupd (%rdi), %zmm1 |
| ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 |
| ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} |
| ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} |
| ; SKX-NEXT: ktestb %k0, %k0 |
| ; SKX-NEXT: je LBB41_2 |
| ; SKX-NEXT: ## BB#1: ## %L1 |
| ; SKX-NEXT: vmovapd %zmm0, (%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; SKX-NEXT: LBB41_2: ## %L2 |
| ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: ktest_1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vmovupd (%rdi), %zmm1 |
| ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 |
| ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} |
| ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: testb %al, %al |
| ; AVX512BW-NEXT: je LBB41_2 |
| ; AVX512BW-NEXT: ## BB#1: ## %L1 |
| ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; AVX512BW-NEXT: LBB41_2: ## %L2 |
| ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: ktest_1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1 |
| ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 |
| ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} |
| ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} |
| ; AVX512DQ-NEXT: ktestb %k0, %k0 |
| ; AVX512DQ-NEXT: je LBB41_2 |
| ; AVX512DQ-NEXT: ## BB#1: ## %L1 |
| ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| ; AVX512DQ-NEXT: LBB41_2: ## %L2 |
| ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %addr1 = getelementptr double, double * %base, i64 0 |
| %addr2 = getelementptr double, double * %base, i64 1 |
| |
| %vaddr1 = bitcast double* %addr1 to <8 x double>* |
| %vaddr2 = bitcast double* %addr2 to <8 x double>* |
| |
| %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 |
| %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 |
| |
| %sel1 = fcmp ogt <8 x double>%in, %val1 |
| %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer |
| %sel2 = fcmp olt <8 x double> %in, %val3 |
| %sel3 = and <8 x i1> %sel1, %sel2 |
| |
| %int_sel3 = bitcast <8 x i1> %sel3 to i8 |
| %res = icmp eq i8 %int_sel3, zeroinitializer |
| br i1 %res, label %L2, label %L1 |
| L1: |
| store <8 x double> %in, <8 x double>* %vaddr1 |
| br label %End |
| L2: |
| store <8 x double> %in, <8 x double>* %vaddr2 |
| br label %End |
| End: |
| ret void |
| } |
| |
| define void @ktest_2(<32 x float> %in, float * %base) { |
| ; |
| ; KNL-LABEL: ktest_2: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: Lcfi6: |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: Lcfi7: |
| ; KNL-NEXT: .cfi_offset %rbp, -16 |
| ; KNL-NEXT: movq %rsp, %rbp |
| ; KNL-NEXT: Lcfi8: |
| ; KNL-NEXT: .cfi_def_cfa_register %rbp |
| ; KNL-NEXT: andq $-32, %rsp |
| ; KNL-NEXT: subq $32, %rsp |
| ; KNL-NEXT: vmovups (%rdi), %zmm2 |
| ; KNL-NEXT: vmovups 64(%rdi), %zmm3 |
| ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 |
| ; KNL-NEXT: kshiftlw $14, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: kshiftlw $15, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ecx |
| ; KNL-NEXT: vmovd %ecx, %xmm3 |
| ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $13, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $12, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $11, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $10, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $9, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $8, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $7, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $6, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $5, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $4, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $3, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $2, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $1, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 |
| ; KNL-NEXT: kshiftlw $14, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: kshiftlw $15, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ecx |
| ; KNL-NEXT: vmovd %ecx, %xmm2 |
| ; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $13, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $12, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $11, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $10, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $9, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $8, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $7, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $6, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $5, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $4, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $3, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $2, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftlw $1, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: kshiftrw $15, %k2, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 |
| ; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} |
| ; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} |
| ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: kshiftlw $15, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: vmovd %ecx, %xmm4 |
| ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 |
| ; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: kshiftlw $15, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: vmovd %ecx, %xmm3 |
| ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 |
| ; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 |
| ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 |
| ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 |
| ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 |
| ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 |
| ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rsp) |
| ; KNL-NEXT: cmpl $0, (%rsp) |
| ; KNL-NEXT: je LBB42_2 |
| ; KNL-NEXT: ## BB#1: ## %L1 |
| ; KNL-NEXT: vmovaps %zmm0, (%rdi) |
| ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) |
| ; KNL-NEXT: jmp LBB42_3 |
| ; KNL-NEXT: LBB42_2: ## %L2 |
| ; KNL-NEXT: vmovaps %zmm0, 4(%rdi) |
| ; KNL-NEXT: vmovaps %zmm1, 68(%rdi) |
| ; KNL-NEXT: LBB42_3: ## %End |
| ; KNL-NEXT: movq %rbp, %rsp |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: ktest_2: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm2 |
| ; SKX-NEXT: vmovups 64(%rdi), %zmm3 |
| ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 |
| ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 |
| ; SKX-NEXT: kunpckwd %k1, %k2, %k0 |
| ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} |
| ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} |
| ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 |
| ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 |
| ; SKX-NEXT: kunpckwd %k1, %k2, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: ktestd %k0, %k0 |
| ; SKX-NEXT: je LBB42_2 |
| ; SKX-NEXT: ## BB#1: ## %L1 |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) |
| ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; SKX-NEXT: LBB42_2: ## %L2 |
| ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) |
| ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: ktest_2: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vmovups (%rdi), %zmm2 |
| ; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3 |
| ; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1 |
| ; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2 |
| ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0 |
| ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} |
| ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} |
| ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1 |
| ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2 |
| ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1 |
| ; AVX512BW-NEXT: kord %k1, %k0, %k0 |
| ; AVX512BW-NEXT: ktestd %k0, %k0 |
| ; AVX512BW-NEXT: je LBB42_2 |
| ; AVX512BW-NEXT: ## BB#1: ## %L1 |
| ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) |
| ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; AVX512BW-NEXT: LBB42_2: ## %L2 |
| ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi) |
| ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: ktest_2: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: pushq %rbp |
| ; AVX512DQ-NEXT: Lcfi6: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 |
| ; AVX512DQ-NEXT: Lcfi7: |
| ; AVX512DQ-NEXT: .cfi_offset %rbp, -16 |
| ; AVX512DQ-NEXT: movq %rsp, %rbp |
| ; AVX512DQ-NEXT: Lcfi8: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp |
| ; AVX512DQ-NEXT: andq $-32, %rsp |
| ; AVX512DQ-NEXT: subq $32, %rsp |
| ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 |
| ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 |
| ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 |
| ; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ecx |
| ; AVX512DQ-NEXT: vmovd %ecx, %xmm3 |
| ; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 |
| ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ecx |
| ; AVX512DQ-NEXT: vmovd %ecx, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} |
| ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} |
| ; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ecx |
| ; AVX512DQ-NEXT: vmovd %ecx, %xmm4 |
| ; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 |
| ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ecx |
| ; AVX512DQ-NEXT: vmovd %ecx, %xmm3 |
| ; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 |
| ; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 |
| ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 |
| ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rsp) |
| ; AVX512DQ-NEXT: cmpl $0, (%rsp) |
| ; AVX512DQ-NEXT: je LBB42_2 |
| ; AVX512DQ-NEXT: ## BB#1: ## %L1 |
| ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) |
| ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) |
| ; AVX512DQ-NEXT: jmp LBB42_3 |
| ; AVX512DQ-NEXT: LBB42_2: ## %L2 |
| ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi) |
| ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi) |
| ; AVX512DQ-NEXT: LBB42_3: ## %End |
| ; AVX512DQ-NEXT: movq %rbp, %rsp |
| ; AVX512DQ-NEXT: popq %rbp |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %addr1 = getelementptr float, float * %base, i64 0 |
| %addr2 = getelementptr float, float * %base, i64 1 |
| |
| %vaddr1 = bitcast float* %addr1 to <32 x float>* |
| %vaddr2 = bitcast float* %addr2 to <32 x float>* |
| |
| %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 |
| %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 |
| |
| %sel1 = fcmp ogt <32 x float>%in, %val1 |
| %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer |
| %sel2 = fcmp olt <32 x float> %in, %val3 |
| %sel3 = or <32 x i1> %sel1, %sel2 |
| |
| %int_sel3 = bitcast <32 x i1> %sel3 to i32 |
| %res = icmp eq i32 %int_sel3, zeroinitializer |
| br i1 %res, label %L2, label %L1 |
| L1: |
| store <32 x float> %in, <32 x float>* %vaddr1 |
| br label %End |
| L2: |
| store <32 x float> %in, <32 x float>* %vaddr2 |
| br label %End |
| End: |
| ret void |
| } |
| |
| define <8 x i64> @load_8i1(<8 x i1>* %a) { |
| ; KNL-LABEL: load_8i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: movzbl (%rdi), %eax |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_8i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_8i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movzbl (%rdi), %eax |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_8i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovb (%rdi), %k0 |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQ-NEXT: retq |
| %b = load <8 x i1>, <8 x i1>* %a |
| %c = sext <8 x i1> %b to <8 x i64> |
| ret <8 x i64> %c |
| } |
| |
| define <16 x i32> @load_16i1(<16 x i1>* %a) { |
| ; KNL-LABEL: load_16i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw (%rdi), %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_16i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovw (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_16i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovw (%rdi), %k1 |
| ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_16i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw (%rdi), %k0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: retq |
| %b = load <16 x i1>, <16 x i1>* %a |
| %c = sext <16 x i1> %b to <16 x i32> |
| ret <16 x i32> %c |
| } |
| |
| define <2 x i16> @load_2i1(<2 x i1>* %a) { |
| ; KNL-LABEL: load_2i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: movzbl (%rdi), %eax |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_2i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_2i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movzbl (%rdi), %eax |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_2i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovb (%rdi), %k0 |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %b = load <2 x i1>, <2 x i1>* %a |
| %c = sext <2 x i1> %b to <2 x i16> |
| ret <2 x i16> %c |
| } |
| |
| define <4 x i16> @load_4i1(<4 x i1>* %a) { |
| ; KNL-LABEL: load_4i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: movzbl (%rdi), %eax |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovqd %zmm0, %ymm0 |
| ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_4i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_4i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: movzbl (%rdi), %eax |
| ; AVX512BW-NEXT: kmovd %eax, %k1 |
| ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 |
| ; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_4i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovb (%rdi), %k0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %b = load <4 x i1>, <4 x i1>* %a |
| %c = sext <4 x i1> %b to <4 x i16> |
| ret <4 x i16> %c |
| } |
| |
| define <32 x i16> @load_32i1(<32 x i1>* %a) { |
| ; KNL-LABEL: load_32i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw (%rdi), %k1 |
| ; KNL-NEXT: kmovw 2(%rdi), %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} |
| ; KNL-NEXT: vpmovdw %zmm1, %ymm1 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_32i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_32i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd (%rdi), %k0 |
| ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_32i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw (%rdi), %k0 |
| ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 |
| ; AVX512DQ-NEXT: retq |
| %b = load <32 x i1>, <32 x i1>* %a |
| %c = sext <32 x i1> %b to <32 x i16> |
| ret <32 x i16> %c |
| } |
| |
| define <64 x i8> @load_64i1(<64 x i1>* %a) { |
| ; KNL-LABEL: load_64i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw (%rdi), %k1 |
| ; KNL-NEXT: kmovw 2(%rdi), %k2 |
| ; KNL-NEXT: kmovw 4(%rdi), %k3 |
| ; KNL-NEXT: kmovw 6(%rdi), %k4 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z} |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z} |
| ; KNL-NEXT: vpmovdb %zmm2, %xmm2 |
| ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: load_64i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovq (%rdi), %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: load_64i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovq (%rdi), %k0 |
| ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: load_64i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw (%rdi), %k0 |
| ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 |
| ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2 |
| ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm2 |
| ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 |
| ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX512DQ-NEXT: retq |
| %b = load <64 x i1>, <64 x i1>* %a |
| %c = sext <64 x i1> %b to <64 x i8> |
| ret <64 x i8> %c |
| } |
| |
| define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { |
| ; KNL-LABEL: store_8i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_8i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_8i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_8i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <8 x i1> %v, <8 x i1>* %a |
| ret void |
| } |
| |
| define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { |
| ; KNL-LABEL: store_8i1_1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movb %al, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_8i1_1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 |
| ; SKX-NEXT: kmovb %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_8i1_1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movb %al, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_8i1_1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %v1 = trunc <8 x i16> %v to <8 x i1> |
| store <8 x i1> %v1, <8 x i1>* %a |
| ret void |
| } |
| |
| define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { |
| ; KNL-LABEL: store_16i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_16i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 |
| ; SKX-NEXT: kmovw %k0, (%rdi) |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_16i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovw %k0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_16i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <16 x i1> %v, <16 x i1>* %a |
| ret void |
| } |
| |
| define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { |
| ; KNL-LABEL: store_32i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; KNL-NEXT: kmovw %k0, 2(%rdi) |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_32i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; SKX-NEXT: vpmovb2m %ymm0, %k0 |
| ; SKX-NEXT: kmovd %k0, (%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_32i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_32i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <32 x i1> %v, <32 x i1>* %a |
| ret void |
| } |
| |
| define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { |
| ; KNL-LABEL: store_32i1_1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; KNL-NEXT: kmovw %k0, 2(%rdi) |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rdi) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_32i1_1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 |
| ; SKX-NEXT: vpmovw2m %zmm0, %k0 |
| ; SKX-NEXT: kmovd %k0, (%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_32i1_1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 |
| ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_32i1_1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 |
| ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rdi) |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %v1 = trunc <32 x i16> %v to <32 x i1> |
| store <32 x i1> %v1, <32 x i1>* %a |
| ret void |
| } |
| |
| |
| define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { |
| ; |
| ; KNL-LABEL: store_64i1: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: Lcfi9: |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: pushq %r15 |
| ; KNL-NEXT: Lcfi10: |
| ; KNL-NEXT: .cfi_def_cfa_offset 24 |
| ; KNL-NEXT: pushq %r14 |
| ; KNL-NEXT: Lcfi11: |
| ; KNL-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL-NEXT: pushq %r13 |
| ; KNL-NEXT: Lcfi12: |
| ; KNL-NEXT: .cfi_def_cfa_offset 40 |
| ; KNL-NEXT: pushq %r12 |
| ; KNL-NEXT: Lcfi13: |
| ; KNL-NEXT: .cfi_def_cfa_offset 48 |
| ; KNL-NEXT: pushq %rbx |
| ; KNL-NEXT: Lcfi14: |
| ; KNL-NEXT: .cfi_def_cfa_offset 56 |
| ; KNL-NEXT: Lcfi15: |
| ; KNL-NEXT: .cfi_offset %rbx, -56 |
| ; KNL-NEXT: Lcfi16: |
| ; KNL-NEXT: .cfi_offset %r12, -48 |
| ; KNL-NEXT: Lcfi17: |
| ; KNL-NEXT: .cfi_offset %r13, -40 |
| ; KNL-NEXT: Lcfi18: |
| ; KNL-NEXT: .cfi_offset %r14, -32 |
| ; KNL-NEXT: Lcfi19: |
| ; KNL-NEXT: .cfi_offset %r15, -24 |
| ; KNL-NEXT: Lcfi20: |
| ; KNL-NEXT: .cfi_offset %rbp, -16 |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 |
| ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 |
| ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r8d |
| ; KNL-NEXT: kshiftlw $15, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: kshiftlw $13, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r10d |
| ; KNL-NEXT: kshiftlw $12, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r11d |
| ; KNL-NEXT: kshiftlw $11, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r14d |
| ; KNL-NEXT: kshiftlw $10, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r15d |
| ; KNL-NEXT: kshiftlw $9, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r12d |
| ; KNL-NEXT: kshiftlw $8, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r13d |
| ; KNL-NEXT: kshiftlw $7, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebx |
| ; KNL-NEXT: kshiftlw $6, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebp |
| ; KNL-NEXT: kshiftlw $5, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: kshiftlw $4, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: kshiftlw $3, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %edx |
| ; KNL-NEXT: kshiftlw $2, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %esi |
| ; KNL-NEXT: kshiftlw $1, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: vmovd %r9d, %xmm3 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 |
| ; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 |
| ; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 |
| ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 |
| ; KNL-NEXT: kmovw %k0, 6(%rdi) |
| ; KNL-NEXT: kshiftlw $14, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r8d |
| ; KNL-NEXT: kshiftlw $15, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r10d |
| ; KNL-NEXT: kshiftlw $13, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r9d |
| ; KNL-NEXT: kshiftlw $12, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r11d |
| ; KNL-NEXT: kshiftlw $11, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r14d |
| ; KNL-NEXT: kshiftlw $10, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r15d |
| ; KNL-NEXT: kshiftlw $9, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r12d |
| ; KNL-NEXT: kshiftlw $8, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r13d |
| ; KNL-NEXT: kshiftlw $7, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ecx |
| ; KNL-NEXT: kshiftlw $6, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %esi |
| ; KNL-NEXT: kshiftlw $5, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ebp |
| ; KNL-NEXT: kshiftlw $4, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ebx |
| ; KNL-NEXT: kshiftlw $3, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: kshiftlw $2, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %edx |
| ; KNL-NEXT: kshiftlw $1, %k2, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: vmovd %r10d, %xmm2 |
| ; KNL-NEXT: kmovw %k0, %r10d |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k2, %k0 |
| ; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 |
| ; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 |
| ; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; KNL-NEXT: kmovw %k0, 4(%rdi) |
| ; KNL-NEXT: kshiftlw $14, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r8d |
| ; KNL-NEXT: kshiftlw $15, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r10d |
| ; KNL-NEXT: kshiftlw $13, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r9d |
| ; KNL-NEXT: kshiftlw $12, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r11d |
| ; KNL-NEXT: kshiftlw $11, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r14d |
| ; KNL-NEXT: kshiftlw $10, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r15d |
| ; KNL-NEXT: kshiftlw $9, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r12d |
| ; KNL-NEXT: kshiftlw $8, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %r13d |
| ; KNL-NEXT: kshiftlw $7, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ecx |
| ; KNL-NEXT: kshiftlw $6, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %esi |
| ; KNL-NEXT: kshiftlw $5, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ebp |
| ; KNL-NEXT: kshiftlw $4, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %ebx |
| ; KNL-NEXT: kshiftlw $3, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: kshiftlw $2, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %edx |
| ; KNL-NEXT: kshiftlw $1, %k1, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: vmovd %r10d, %xmm1 |
| ; KNL-NEXT: kmovw %k0, %r10d |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 |
| ; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: kmovw %k1, 2(%rdi) |
| ; KNL-NEXT: kshiftlw $14, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r8d |
| ; KNL-NEXT: kshiftlw $15, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: kshiftlw $13, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r10d |
| ; KNL-NEXT: kshiftlw $12, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r11d |
| ; KNL-NEXT: kshiftlw $11, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r14d |
| ; KNL-NEXT: kshiftlw $10, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r15d |
| ; KNL-NEXT: kshiftlw $9, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r12d |
| ; KNL-NEXT: kshiftlw $8, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %r13d |
| ; KNL-NEXT: kshiftlw $7, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %edx |
| ; KNL-NEXT: kshiftlw $6, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %esi |
| ; KNL-NEXT: kshiftlw $5, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebp |
| ; KNL-NEXT: kshiftlw $4, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebx |
| ; KNL-NEXT: kshiftlw $3, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %eax |
| ; KNL-NEXT: kshiftlw $2, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: kshiftlw $1, %k0, %k1 |
| ; KNL-NEXT: kshiftrw $15, %k1, %k1 |
| ; KNL-NEXT: vmovd %r9d, %xmm0 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 |
| ; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, (%rdi) |
| ; KNL-NEXT: popq %rbx |
| ; KNL-NEXT: popq %r12 |
| ; KNL-NEXT: popq %r13 |
| ; KNL-NEXT: popq %r14 |
| ; KNL-NEXT: popq %r15 |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: store_64i1: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 |
| ; SKX-NEXT: vpmovb2m %zmm0, %k0 |
| ; SKX-NEXT: kmovq %k0, (%rdi) |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: store_64i1: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 |
| ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovq %k0, (%rdi) |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: store_64i1: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: pushq %rbp |
| ; AVX512DQ-NEXT: Lcfi9: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 |
| ; AVX512DQ-NEXT: pushq %r15 |
| ; AVX512DQ-NEXT: Lcfi10: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 24 |
| ; AVX512DQ-NEXT: pushq %r14 |
| ; AVX512DQ-NEXT: Lcfi11: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 32 |
| ; AVX512DQ-NEXT: pushq %r13 |
| ; AVX512DQ-NEXT: Lcfi12: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 40 |
| ; AVX512DQ-NEXT: pushq %r12 |
| ; AVX512DQ-NEXT: Lcfi13: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 48 |
| ; AVX512DQ-NEXT: pushq %rbx |
| ; AVX512DQ-NEXT: Lcfi14: |
| ; AVX512DQ-NEXT: .cfi_def_cfa_offset 56 |
| ; AVX512DQ-NEXT: Lcfi15: |
| ; AVX512DQ-NEXT: .cfi_offset %rbx, -56 |
| ; AVX512DQ-NEXT: Lcfi16: |
| ; AVX512DQ-NEXT: .cfi_offset %r12, -48 |
| ; AVX512DQ-NEXT: Lcfi17: |
| ; AVX512DQ-NEXT: .cfi_offset %r13, -40 |
| ; AVX512DQ-NEXT: Lcfi18: |
| ; AVX512DQ-NEXT: .cfi_offset %r14, -32 |
| ; AVX512DQ-NEXT: Lcfi19: |
| ; AVX512DQ-NEXT: .cfi_offset %r15, -24 |
| ; AVX512DQ-NEXT: Lcfi20: |
| ; AVX512DQ-NEXT: .cfi_offset %rbp, -16 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 |
| ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 |
| ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r8d |
| ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r9d |
| ; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r10d |
| ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r11d |
| ; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r14d |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r15d |
| ; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r12d |
| ; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r13d |
| ; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ebx |
| ; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ebp |
| ; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ecx |
| ; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %edx |
| ; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %esi |
| ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: vmovd %r9d, %xmm3 |
| ; AVX512DQ-NEXT: kmovw %k1, %r9d |
| ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k2 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 |
| ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) |
| ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r8d |
| ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r10d |
| ; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r9d |
| ; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r11d |
| ; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r14d |
| ; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r15d |
| ; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r12d |
| ; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r13d |
| ; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ecx |
| ; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %esi |
| ; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ebp |
| ; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ebx |
| ; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %edx |
| ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: vmovd %r10d, %xmm2 |
| ; AVX512DQ-NEXT: kmovw %k0, %r10d |
| ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0 |
| ; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) |
| ; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r8d |
| ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r10d |
| ; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r9d |
| ; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r11d |
| ; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r14d |
| ; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r15d |
| ; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r12d |
| ; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %r13d |
| ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ecx |
| ; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %esi |
| ; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ebp |
| ; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %ebx |
| ; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %edx |
| ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: vmovd %r10d, %xmm1 |
| ; AVX512DQ-NEXT: kmovw %k0, %r10d |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) |
| ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r8d |
| ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r9d |
| ; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r10d |
| ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r11d |
| ; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r14d |
| ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r15d |
| ; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r12d |
| ; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %r13d |
| ; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %edx |
| ; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %esi |
| ; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ebp |
| ; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ebx |
| ; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %eax |
| ; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: kmovw %k1, %ecx |
| ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 |
| ; AVX512DQ-NEXT: vmovd %r9d, %xmm0 |
| ; AVX512DQ-NEXT: kmovw %k1, %r9d |
| ; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQ-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, (%rdi) |
| ; AVX512DQ-NEXT: popq %rbx |
| ; AVX512DQ-NEXT: popq %r12 |
| ; AVX512DQ-NEXT: popq %r13 |
| ; AVX512DQ-NEXT: popq %r14 |
| ; AVX512DQ-NEXT: popq %r15 |
| ; AVX512DQ-NEXT: popq %rbp |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| store <64 x i1> %v, <64 x i1>* %a |
| ret void |
| } |
| |
| define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { |
| ; KNL-LABEL: test_bitcast_v8i1_zext: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: movzbl %al, %eax |
| ; KNL-NEXT: addl %eax, %eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_bitcast_v8i1_zext: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: kmovb %k0, %eax |
| ; SKX-NEXT: addl %eax, %eax |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_bitcast_v8i1_zext: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: movzbl %al, %eax |
| ; AVX512BW-NEXT: addl %eax, %eax |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_bitcast_v8i1_zext: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovb %k0, %eax |
| ; AVX512DQ-NEXT: addl %eax, %eax |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %v1 = icmp eq <16 x i32> %a, zeroinitializer |
| %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %mask1 = bitcast <8 x i1> %mask to i8 |
| %val = zext i8 %mask1 to i32 |
| %val1 = add i32 %val, %val |
| ret i32 %val1 |
| } |
| |
| define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { |
| ; KNL-LABEL: test_bitcast_v16i1_zext: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: addl %eax, %eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_bitcast_v16i1_zext: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: kmovw %k0, %eax |
| ; SKX-NEXT: addl %eax, %eax |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_bitcast_v16i1_zext: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; AVX512BW-NEXT: kmovw %k0, %eax |
| ; AVX512BW-NEXT: addl %eax, %eax |
| ; AVX512BW-NEXT: vzeroupper |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_bitcast_v16i1_zext: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 |
| ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: addl %eax, %eax |
| ; AVX512DQ-NEXT: vzeroupper |
| ; AVX512DQ-NEXT: retq |
| %v1 = icmp eq <16 x i32> %a, zeroinitializer |
| %mask1 = bitcast <16 x i1> %v1 to i16 |
| %val = zext i16 %mask1 to i32 |
| %val1 = add i32 %val, %val |
| ret i32 %val1 |
| } |
| |
| define i16 @test_v16i1_add(i16 %x, i16 %y) { |
| ; KNL-LABEL: test_v16i1_add: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v16i1_add: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v16i1_add: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v16i1_add: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = add <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @test_v16i1_sub(i16 %x, i16 %y) { |
| ; KNL-LABEL: test_v16i1_sub: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v16i1_sub: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v16i1_sub: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v16i1_sub: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = sub <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @test_v16i1_mul(i16 %x, i16 %y) { |
| ; KNL-LABEL: test_v16i1_mul: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v16i1_mul: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kandw %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v16i1_mul: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kandw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v16i1_mul: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = mul <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i8 @test_v8i1_add(i8 %x, i8 %y) { |
| ; KNL-LABEL: test_v8i1_add: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v8i1_add: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kxorb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v8i1_add: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v8i1_add: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = add <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |
| |
| define i8 @test_v8i1_sub(i8 %x, i8 %y) { |
| ; KNL-LABEL: test_v8i1_sub: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kxorw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v8i1_sub: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kxorb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v8i1_sub: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v8i1_sub: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = sub <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |
| |
| define i8 @test_v8i1_mul(i8 %x, i8 %y) { |
| ; KNL-LABEL: test_v8i1_mul: |
| ; KNL: ## BB#0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test_v8i1_mul: |
| ; SKX: ## BB#0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512BW-LABEL: test_v8i1_mul: |
| ; AVX512BW: ## BB#0: |
| ; AVX512BW-NEXT: kmovd %edi, %k0 |
| ; AVX512BW-NEXT: kmovd %esi, %k1 |
| ; AVX512BW-NEXT: kandw %k1, %k0, %k0 |
| ; AVX512BW-NEXT: kmovd %k0, %eax |
| ; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512BW-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: test_v8i1_mul: |
| ; AVX512DQ: ## BB#0: |
| ; AVX512DQ-NEXT: kmovw %edi, %k0 |
| ; AVX512DQ-NEXT: kmovw %esi, %k1 |
| ; AVX512DQ-NEXT: kandb %k1, %k0, %k0 |
| ; AVX512DQ-NEXT: kmovw %k0, %eax |
| ; AVX512DQ-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> |
| ; AVX512DQ-NEXT: retq |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = mul <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |