| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW |
| |
| define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x8mem_to_8x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i16> |
| %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer |
| ret <8 x i16> %ret |
| } |
| |
| define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x8mem_to_8x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1 |
| ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i16> |
| %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer |
| ret <8 x i16> %ret |
| } |
| |
| |
| define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x8mem_to_16x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x8mem_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = zext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_16x8mem_to_16x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpmovsxbw (%rdi), %ymm1 |
| ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_16x8mem_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = sext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { |
| ; ALL-LABEL: zext_16x8_to_16x16: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; ALL-NEXT: retq |
| %x = zext <16 x i8> %a to <16 x i16> |
| ret <16 x i16> %x |
| } |
| |
| define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x8_to_16x16_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { |
| ; ALL-LABEL: sext_16x8_to_16x16: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; ALL-NEXT: retq |
| %x = sext <16 x i8> %a to <16 x i16> |
| ret <16 x i16> %x |
| } |
| |
| define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_16x8_to_16x16_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = sext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_32x8mem_to_32x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_32x8mem_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; SKX-NEXT: vpmovb2m %ymm0, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <32 x i8>,<32 x i8> *%i,align 1 |
| %x = zext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_32x8mem_to_32x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpmovsxbw (%rdi), %ymm2 |
| ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm3 |
| ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_32x8mem_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 |
| ; SKX-NEXT: vpmovb2m %ymm0, %k1 |
| ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm2 |
| ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm3 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <32 x i8>,<32 x i8> *%i,align 1 |
| %x = sext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { |
| ; KNL-LABEL: zext_32x8_to_32x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_32x8_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <32 x i8> %a to <32 x i16> |
| ret <32 x i16> %x |
| } |
| |
| define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_32x8_to_32x16_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_32x8_to_32x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { |
| ; KNL-LABEL: sext_32x8_to_32x16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; KNL-NEXT: vpmovsxbw %xmm1, %ymm1 |
| ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_32x8_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = sext <32 x i8> %a to <32 x i16> |
| ret <32 x i16> %x |
| } |
| |
| define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_32x8_to_32x16_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovsxbw %xmm0, %ymm3 |
| ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_32x8_to_32x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 |
| ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3 |
| ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = sext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x8mem_to_4x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x8mem_to_4x32: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = zext <4 x i8> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_4x8mem_to_4x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_4x8mem_to_4x32: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x8mem_to_8x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x8mem_to_8x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x8mem_to_16x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x8mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = zext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_16x8mem_to_16x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_16x8mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = sext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x8_to_16x32_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x32_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_16x8_to_16x32_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x32_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %x = sext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { |
| ; ALL-LABEL: zext_16x8_to_16x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; ALL-NEXT: retq |
| %x = zext <16 x i8> %i to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { |
| ; ALL-LABEL: sext_16x8_to_16x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; ALL-NEXT: retq |
| %x = sext <16 x i8> %i to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_2x8mem_to_2x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_2x8mem_to_2x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = zext <2 x i8> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_2x8mem_to_2x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = sext <2 x i8> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_2x8mem_to_2x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbq (%rdi), %xmm0 |
| ; ALL-NEXT: retq |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = sext <2 x i8> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x8mem_to_4x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x8mem_to_4x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = zext <4 x i8> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_4x8mem_to_4x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_4x8mem_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbq (%rdi), %ymm0 |
| ; ALL-NEXT: retq |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x8mem_to_8x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x8mem_to_8x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_8x8mem_to_8x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 |
| ; ALL-NEXT: retq |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x16mem_to_4x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x16mem_to_4x32: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = zext <4 x i16> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_4x16mem_to_4x32mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_4x16mem_to_4x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwd (%rdi), %xmm0 |
| ; ALL-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i32> |
| ret <4 x i32> %x |
| } |
| |
| |
| define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x16mem_to_8x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x16mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = zext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x16mem_to_8x32mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_8x16mem_to_8x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwd (%rdi), %ymm0 |
| ; ALL-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i32> |
| ret <8 x i32> %x |
| } |
| |
| define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x16_to_8x32mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 |
| ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { |
| ; ALL-LABEL: zext_8x16_to_8x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; ALL-NEXT: retq |
| %x = zext <8 x i16> %a to <8 x i32> |
| ret <8 x i32> %x |
| } |
| |
| define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x16mem_to_16x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x16mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = zext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_16x16mem_to_16x32mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_16x16mem_to_16x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = sext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_16x16mem_to_16x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwd (%rdi), %zmm0 |
| ; ALL-NEXT: retq |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = sext <16 x i16> %a to <16 x i32> |
| ret <16 x i32> %x |
| } |
| define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_16x16_to_16x32mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16x16_to_16x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { |
| ; ALL-LABEL: zext_16x16_to_16x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; ALL-NEXT: retq |
| %x = zext <16 x i16> %a to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_2x16mem_to_2x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_2x16mem_to_2x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = zext <2 x i16> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_2x16mem_to_2x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = sext <2 x i16> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_2x16mem_to_2x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwq (%rdi), %xmm0 |
| ; ALL-NEXT: retq |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = sext <2 x i16> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x16mem_to_4x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x16mem_to_4x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = zext <4 x i16> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_4x16mem_to_4x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_4x16mem_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwq (%rdi), %ymm0 |
| ; ALL-NEXT: retq |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x16mem_to_8x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x16mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = zext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x16mem_to_8x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_8x16mem_to_8x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxwq (%rdi), %zmm0 |
| ; ALL-NEXT: retq |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x16_to_8x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 |
| ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { |
| ; ALL-LABEL: zext_8x16_to_8x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero |
| ; ALL-NEXT: retq |
| %ret = zext <8 x i16> %a to <8 x i64> |
| ret <8 x i64> %ret |
| } |
| |
| define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_2x32mem_to_2x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_2x32mem_to_2x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = zext <2 x i32> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_2x32mem_to_2x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = sext <2 x i32> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_2x32mem_to_2x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxdq (%rdi), %xmm0 |
| ; ALL-NEXT: retq |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = sext <2 x i32> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x32mem_to_4x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x32mem_to_4x64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = zext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_4x32mem_to_4x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 |
| ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} |
| ; AVX512DQ-NEXT: retq |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = sext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_4x32mem_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxdq (%rdi), %ymm0 |
| ; ALL-NEXT: retq |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = sext <4 x i32> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { |
| ; ALL-LABEL: sext_4x32_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxdq %xmm0, %ymm0 |
| ; ALL-NEXT: retq |
| %x = sext <4 x i32> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_4x32_to_4x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 |
| ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero |
| ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: zext_4x32_to_4x64mask: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1 |
| ; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1 |
| ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero |
| ; AVX512DQ-NEXT: retq |
| %x = zext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x32mem_to_8x64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x32mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = zext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: sext_8x32mem_to_8x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8x32mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 |
| ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} |
| ; AVX512DQNOBW-NEXT: retq |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = sext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { |
| ; ALL-LABEL: sext_8x32mem_to_8x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxdq (%rdi), %zmm0 |
| ; ALL-NEXT: retq |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = sext <8 x i32> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { |
| ; ALL-LABEL: sext_8x32_to_8x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxdq %ymm0, %zmm0 |
| ; ALL-NEXT: retq |
| %x = sext <8 x i32> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: zext_8x32_to_8x64mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 |
| ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 |
| ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 |
| ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8x32_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero |
| ; AVX512DQNOBW-NEXT: retq |
| %x = zext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { |
| ; ALL-LABEL: fptrunc_test: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 |
| ; ALL-NEXT: retq |
| %b = fptrunc <8 x double> %a to <8 x float> |
| ret <8 x float> %b |
| } |
| |
| define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { |
| ; ALL-LABEL: fpext_test: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 |
| ; ALL-NEXT: retq |
| %b = fpext <8 x float> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { |
| ; KNL-LABEL: zext_16i1_to_16xi32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_16i1_to_16xi32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 |
| ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: kmovw %edi, %k0 |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = bitcast i16 %b to <16 x i1> |
| %c = zext <16 x i1> %a to <16 x i32> |
| ret <16 x i32> %c |
| } |
| |
| define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { |
| ; KNL-LABEL: zext_8i1_to_8xi64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_8i1_to_8xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 |
| ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: kmovw %edi, %k0 |
| ; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %a = bitcast i8 %b to <8 x i1> |
| %c = zext <8 x i1> %a to <8 x i64> |
| ret <8 x i64> %c |
| } |
| |
| define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { |
| ; ALL-LABEL: trunc_16i8_to_16i1: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; ALL-NEXT: vpmovmskb %xmm0, %eax |
| ; ALL-NEXT: # kill: def $ax killed $ax killed $eax |
| ; ALL-NEXT: retq |
| %mask_b = trunc <16 x i8>%a to <16 x i1> |
| %mask = bitcast <16 x i1> %mask_b to i16 |
| ret i16 %mask |
| } |
| |
| define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { |
| ; KNL-LABEL: trunc_16i32_to_16i1: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: # kill: def $ax killed $ax killed $eax |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: trunc_16i32_to_16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; SKX-NEXT: vpmovd2m %zmm0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, %eax |
| ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax |
| ; AVX512DQNOBW-NEXT: vzeroupper |
| ; AVX512DQNOBW-NEXT: retq |
| %mask_b = trunc <16 x i32>%a to <16 x i1> |
| %mask = bitcast <16 x i1> %mask_b to i16 |
| ret i16 %mask |
| } |
| |
| define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { |
| ; ALL-LABEL: trunc_4i32_to_4i1: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; ALL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; ALL-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; ALL-NEXT: retq |
| %mask_a = trunc <4 x i32>%a to <4 x i1> |
| %mask_b = trunc <4 x i32>%b to <4 x i1> |
| %a_and_b = and <4 x i1>%mask_a, %mask_b |
| %res = sext <4 x i1>%a_and_b to <4 x i32> |
| ret <4 x i32>%res |
| } |
| |
| |
| define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { |
| ; KNL-LABEL: trunc_8i16_to_8i1: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 |
| ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 |
| ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: # kill: def $al killed $al killed $eax |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: trunc_8i16_to_8i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, %eax |
| ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax |
| ; AVX512DQNOBW-NEXT: vzeroupper |
| ; AVX512DQNOBW-NEXT: retq |
| %mask_b = trunc <8 x i16>%a to <8 x i1> |
| %mask = bitcast <8 x i1> %mask_b to i8 |
| ret i8 %mask |
| } |
| |
| define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; KNL-LABEL: sext_8i1_8i32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 |
| ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_8i1_8i32: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 |
| ; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 |
| ; AVX512DQ-NEXT: retq |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> |
| %y = sext <8 x i1> %x1 to <8 x i32> |
| ret <8 x i32> %y |
| } |
| |
| |
| define i16 @trunc_i32_to_i1(i32 %a) { |
| ; KNL-LABEL: trunc_i32_to_i1: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: movw $-4, %ax |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: andl $1, %edi |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, %eax |
| ; KNL-NEXT: # kill: def $ax killed $ax killed $eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: trunc_i32_to_i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movw $-4, %ax |
| ; SKX-NEXT: kmovd %eax, %k0 |
| ; SKX-NEXT: kshiftrw $1, %k0, %k0 |
| ; SKX-NEXT: kshiftlw $1, %k0, %k0 |
| ; SKX-NEXT: andl $1, %edi |
| ; SKX-NEXT: kmovw %edi, %k1 |
| ; SKX-NEXT: korw %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: trunc_i32_to_i1: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: movw $-4, %ax |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: andl $1, %edi |
| ; AVX512DQNOBW-NEXT: kmovw %edi, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, %eax |
| ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax |
| ; AVX512DQNOBW-NEXT: retq |
| %a_i = trunc i32 %a to i1 |
| %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 |
| %res = bitcast <16 x i1> %maskv to i16 |
| ret i16 %res |
| } |
| |
| define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; KNL-LABEL: sext_8i1_8i16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 |
| ; KNL-NEXT: vzeroupper |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: sext_8i1_8i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: sext_8i1_8i16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vzeroupper |
| ; AVX512DQNOBW-NEXT: retq |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %y = sext <8 x i1> %x to <8 x i16> |
| ret <8 x i16> %y |
| } |
| |
| define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { |
| ; KNL-LABEL: sext_16i1_16i32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_16i1_16i32: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 |
| ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 |
| ; AVX512DQ-NEXT: retq |
| %x = icmp slt <16 x i32> %a1, %a2 |
| %y = sext <16 x i1> %x to <16 x i32> |
| ret <16 x i32> %y |
| } |
| |
| define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; KNL-LABEL: sext_8i1_8i64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: vpmovsxdq %ymm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; AVX512DQ-LABEL: sext_8i1_8i64: |
| ; AVX512DQ: # %bb.0: |
| ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 |
| ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 |
| ; AVX512DQ-NEXT: retq |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %y = sext <8 x i1> %x to <8 x i64> |
| ret <8 x i64> %y |
| } |
| |
| define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { |
| ; ALL-LABEL: extload_v8i64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 |
| ; ALL-NEXT: vmovdqa64 %zmm0, (%rsi) |
| ; ALL-NEXT: vzeroupper |
| ; ALL-NEXT: retq |
| %sign_load = load <8 x i8>, <8 x i8>* %a |
| %c = sext <8 x i8> %sign_load to <8 x i64> |
| store <8 x i64> %c, <8 x i64>* %res |
| ret void |
| } |
| |
| define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: test21: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kshiftlw $15, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kshiftlw $1, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k3 |
| ; KNL-NEXT: kmovw %edx, %k1 |
| ; KNL-NEXT: kshiftlw $2, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k3, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $13, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k4 |
| ; KNL-NEXT: kmovw %ecx, %k1 |
| ; KNL-NEXT: kshiftlw $3, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k4, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $12, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k5 |
| ; KNL-NEXT: kmovw %r8d, %k1 |
| ; KNL-NEXT: kshiftlw $4, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k5, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $11, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k6 |
| ; KNL-NEXT: kmovw %r9d, %k1 |
| ; KNL-NEXT: kshiftlw $5, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k6, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k7 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $6, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $9, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $7, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $8, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $8, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $9, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $10, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $11, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $12, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $13, %k1, %k1 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $2, %k0, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $15, %k0, %k2 |
| ; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k0, %k2, %k0 |
| ; KNL-NEXT: korw %k0, %k1, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: kshiftlw $15, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $1, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $14, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $2, %k1, %k1 |
| ; KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k3, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $13, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $3, %k1, %k1 |
| ; KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k4, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $12, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $4, %k1, %k1 |
| ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k5, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $11, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $5, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k6, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $10, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $6, %k1, %k1 |
| ; KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $9, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $7, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $8, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $8, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $9, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $10, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $11, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $12, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $13, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $14, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: kshiftlw $15, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $1, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $14, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $2, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k3, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $13, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $3, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k4, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $12, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $4, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k5, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $11, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $5, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k6, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $10, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $6, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $9, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $7, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k2, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $8, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $8, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k3, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $9, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k4, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $10, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k5, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $11, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $12, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $13, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $14, %k1, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; KNL-NEXT: korw %k1, %k7, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: korw %k0, %k1, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $15, %k7, %k7 |
| ; KNL-NEXT: korw %k0, %k7, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $2, %k7, %k7 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: korw %k7, %k1, %k7 |
| ; KNL-NEXT: kshiftlw $14, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $14, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $3, %k7, %k7 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: korw %k7, %k1, %k7 |
| ; KNL-NEXT: kshiftlw $13, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $13, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $4, %k7, %k7 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: korw %k7, %k1, %k7 |
| ; KNL-NEXT: kshiftlw $12, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $12, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $5, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k7 |
| ; KNL-NEXT: kshiftlw $11, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $11, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $6, %k7, %k7 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: korw %k7, %k1, %k7 |
| ; KNL-NEXT: kshiftlw $10, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $10, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $7, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k2, %k7 |
| ; KNL-NEXT: kshiftlw $9, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $9, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $8, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k3, %k7 |
| ; KNL-NEXT: kshiftlw $8, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $8, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k4, %k7 |
| ; KNL-NEXT: kshiftlw $7, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $7, %k0, %k0 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k7 |
| ; KNL-NEXT: kshiftlw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k5, %k6 |
| ; KNL-NEXT: kshiftlw $6, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $6, %k0, %k0 |
| ; KNL-NEXT: korw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k6 |
| ; KNL-NEXT: kshiftlw $11, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k6, %k2, %k5 |
| ; KNL-NEXT: kshiftlw $5, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $5, %k0, %k0 |
| ; KNL-NEXT: korw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k5 |
| ; KNL-NEXT: kshiftlw $12, %k5, %k5 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k5, %k2, %k4 |
| ; KNL-NEXT: kshiftlw $4, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $4, %k0, %k0 |
| ; KNL-NEXT: korw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k4 |
| ; KNL-NEXT: kshiftlw $13, %k4, %k4 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k4, %k2, %k3 |
| ; KNL-NEXT: kshiftlw $3, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $3, %k0, %k0 |
| ; KNL-NEXT: korw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k3 |
| ; KNL-NEXT: kshiftlw $14, %k3, %k3 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload |
| ; KNL-NEXT: korw %k3, %k2, %k2 |
| ; KNL-NEXT: kshiftlw $2, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $2, %k0, %k0 |
| ; KNL-NEXT: korw %k2, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; KNL-NEXT: kmovw %eax, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k0, %k2 |
| ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; KNL-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z} |
| ; KNL-NEXT: vpmovdw %zmm4, %ymm4 |
| ; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1 |
| ; KNL-NEXT: vpmovdw %zmm5, %ymm4 |
| ; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2 |
| ; KNL-NEXT: vpmovdw %zmm6, %ymm4 |
| ; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3 |
| ; KNL-NEXT: vpmovdw %zmm7, %ymm4 |
| ; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test21: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 |
| ; SKX-NEXT: vpmovb2m %zmm2, %k1 |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} |
| ; SKX-NEXT: kshiftrq $32, %k1, %k1 |
| ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: test21: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: kmovw %edi, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: kmovw %esi, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k3 |
| ; AVX512DQNOBW-NEXT: kmovw %edx, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k3, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k4 |
| ; AVX512DQNOBW-NEXT: kmovw %ecx, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k4, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k5 |
| ; AVX512DQNOBW-NEXT: kmovw %r8d, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k5, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k6 |
| ; AVX512DQNOBW-NEXT: kmovw %r9d, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k6, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k7 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k7, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k1, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k1 |
| ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0 |
| ; AVX512DQNOBW-NEXT: korw %k0, %k2, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $15, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k6, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k3, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k4, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k5, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0 |
| ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k2, %k0, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftrw $15, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k7, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $14, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k7, %k0, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k1, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k3, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $8, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $9, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k4, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 |
| ; AVX512DQNOBW-NEXT: kshiftlw $10, %k7, %k7 |
| ; AVX512DQNOBW-NEXT: korw %k7, %k5, %k6 |
| ; AVX512DQNOBW-NEXT: kshiftlw $6, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k6, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 |
| ; AVX512DQNOBW-NEXT: kshiftlw $11, %k6, %k6 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k6, %k1, %k5 |
| ; AVX512DQNOBW-NEXT: kshiftlw $5, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k5, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 |
| ; AVX512DQNOBW-NEXT: kshiftlw $12, %k5, %k5 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k5, %k1, %k4 |
| ; AVX512DQNOBW-NEXT: kshiftlw $4, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k4, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k4 |
| ; AVX512DQNOBW-NEXT: kshiftlw $13, %k4, %k4 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k4, %k1, %k3 |
| ; AVX512DQNOBW-NEXT: kshiftlw $3, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k3 |
| ; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: korw %k3, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $2, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k1, %k2, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al |
| ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 |
| ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 |
| ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm6 |
| ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload |
| ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm7 |
| ; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4 |
| ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm4 |
| ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2 |
| ; AVX512DQNOBW-NEXT: vpmovdw %zmm6, %ymm4 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3 |
| ; AVX512DQNOBW-NEXT: vpmovdw %zmm7, %ymm4 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm4, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer |
| ret <64 x i16> %ret |
| } |
| |
| define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { |
| ; ALL-LABEL: shuffle_zext_16x8_to_16x16: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> |
| %2 = bitcast <32 x i8> %1 to <16 x i16> |
| ret <16 x i16> %2 |
| } |
| |
| define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { |
| ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 |
| ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> |
| %bc = bitcast <32 x i8> %x to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { |
| ; ALL-LABEL: zext_32x8_to_16x16: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> |
| %2 = bitcast <32 x i8> %1 to <16 x i16> |
| ret <16 x i16> %2 |
| } |
| |
| define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { |
| ; ALL-LABEL: zext_32x8_to_8x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> |
| %2 = bitcast <32 x i8> %1 to <8 x i32> |
| ret <8 x i32> %2 |
| } |
| |
| define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { |
| ; ALL-LABEL: zext_32x8_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> |
| %2 = bitcast <32 x i8> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { |
| ; ALL-LABEL: zext_16x16_to_8x32: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> |
| %2 = bitcast <16 x i16> %1 to <8 x i32> |
| ret <8 x i32> %2 |
| } |
| |
| define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { |
| ; ALL-LABEL: zext_16x16_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> |
| %2 = bitcast <16 x i16> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { |
| ; ALL-LABEL: zext_8x32_to_4x64: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero |
| ; ALL-NEXT: retq |
| %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> |
| %2 = bitcast <8 x i32> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { |
| ; KNL-LABEL: zext_64xi1_to_64xi8: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 |
| ; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| ; KNL-NEXT: vpand %ymm3, %ymm2, %ymm2 |
| ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_64xi1_to_64xi8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 |
| ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 |
| ; AVX512DQNOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm2, %ymm2 |
| ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %mask = icmp eq <64 x i8> %x, %y |
| %1 = zext <64 x i1> %mask to <64 x i8> |
| ret <64 x i8> %1 |
| } |
| |
| define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { |
| ; KNL-LABEL: zext_32xi1_to_32xi16: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 |
| ; KNL-NEXT: vpsrlw $15, %ymm2, %ymm2 |
| ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0 |
| ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_32xi1_to_32xi16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %zmm0 |
| ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 |
| ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm2, %ymm2 |
| ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %mask = icmp eq <32 x i16> %x, %y |
| %1 = zext <32 x i1> %mask to <32 x i16> |
| ret <32 x i16> %1 |
| } |
| |
| define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { |
| ; ALL-LABEL: zext_16xi1_to_16xi16: |
| ; ALL: # %bb.0: |
| ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 |
| ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0 |
| ; ALL-NEXT: retq |
| %mask = icmp eq <16 x i16> %x, %y |
| %1 = zext <16 x i1> %mask to <16 x i16> |
| ret <16 x i16> %1 |
| } |
| |
| |
| define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { |
| ; KNL-LABEL: zext_32xi1_to_32xi8: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 |
| ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero |
| ; KNL-NEXT: vpmovdb %zmm1, %xmm1 |
| ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_32xi1_to_32xi8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 |
| ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 |
| ; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 |
| ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 |
| ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero |
| ; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1 |
| ; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %mask = icmp eq <32 x i16> %x, %y |
| %1 = zext <32 x i1> %mask to <32 x i8> |
| ret <32 x i8> %1 |
| } |
| |
| define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { |
| ; KNL-LABEL: zext_4xi1_to_4x32: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_4xi1_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 |
| ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; AVX512DQNOBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %mask = icmp eq <4 x i8> %x, %y |
| %1 = zext <4 x i1> %mask to <4 x i32> |
| ret <4 x i32> %1 |
| } |
| |
| define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { |
| ; KNL-LABEL: zext_2xi1_to_2xi64: |
| ; KNL: # %bb.0: |
| ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 |
| ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero |
| ; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: zext_2xi1_to_2xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 |
| ; SKX-NEXT: vpmovm2q %k0, %xmm0 |
| ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 |
| ; SKX-NEXT: retq |
| ; |
| ; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64: |
| ; AVX512DQNOBW: # %bb.0: |
| ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero |
| ; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 |
| ; AVX512DQNOBW-NEXT: retq |
| %mask = icmp eq <2 x i8> %x, %y |
| %1 = zext <2 x i1> %mask to <2 x i64> |
| ret <2 x i64> %1 |
| } |