| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX |
| ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32 |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL |
| |
| define <16 x i1> @test1() { |
| ; ALL_X64-LABEL: test1: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test1: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test1: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: retq |
| ret <16 x i1> zeroinitializer |
| } |
| |
| define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) { |
| ; ALL_X64-LABEL: test2: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test2: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test2: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; FASTISEL-NEXT: vpmovb2m %xmm1, %k0 |
| ; FASTISEL-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovb2m %xmm0, %k1 |
| ; FASTISEL-NEXT: kandw %k0, %k1, %k0 |
| ; FASTISEL-NEXT: vpmovm2b %k0, %xmm0 |
| ; FASTISEL-NEXT: retq |
| %c = and <16 x i1>%a, %b |
| ret <16 x i1> %c |
| } |
| |
| define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) { |
| ; ALL_X64-LABEL: test3: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test3: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test3: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: vpsllw $15, %xmm1, %xmm1 |
| ; FASTISEL-NEXT: vpmovw2m %xmm1, %k0 |
| ; FASTISEL-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovw2m %xmm0, %k1 |
| ; FASTISEL-NEXT: kandb %k0, %k1, %k0 |
| ; FASTISEL-NEXT: vpmovm2w %k0, %xmm0 |
| ; FASTISEL-NEXT: retq |
| %c = and <8 x i1>%a, %b |
| ret <8 x i1> %c |
| } |
| |
| define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) { |
| ; ALL_X64-LABEL: test4: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test4: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test4: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: vpslld $31, %xmm1, %xmm1 |
| ; FASTISEL-NEXT: vpmovd2m %xmm1, %k0 |
| ; FASTISEL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovd2m %xmm0, %k1 |
| ; FASTISEL-NEXT: kandw %k0, %k1, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %xmm0 |
| ; FASTISEL-NEXT: retq |
| %c = and <4 x i1>%a, %b |
| ret <4 x i1> %c |
| } |
| |
| declare <8 x i1> @func8xi1(<8 x i1> %a) |
| |
| define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) { |
| ; KNL-LABEL: test5: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rax |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 |
| ; KNL-NEXT: callq _func8xi1 |
| ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; KNL-NEXT: vpsrad $31, %ymm0, %ymm0 |
| ; KNL-NEXT: popq %rax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test5: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rax |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: callq _func8xi1 |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; SKX-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; SKX-NEXT: vpsrad $31, %ymm0, %ymm0 |
| ; SKX-NEXT: popq %rax |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test5: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: subl $12, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 |
| ; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 |
| ; KNL_X32-NEXT: calll _func8xi1 |
| ; KNL_X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; KNL_X32-NEXT: vpslld $31, %ymm0, %ymm0 |
| ; KNL_X32-NEXT: vpsrad $31, %ymm0, %ymm0 |
| ; KNL_X32-NEXT: addl $12, %esp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test5: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rax |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2w %k0, %xmm0 |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: callq _func8xi1 |
| ; FASTISEL-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovw2m %xmm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %ymm0 |
| ; FASTISEL-NEXT: popq %rax |
| ; FASTISEL-NEXT: retq |
| %cmpRes = icmp sgt <8 x i32>%a, %b |
| %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes) |
| %res = sext <8 x i1>%resi to <8 x i32> |
| ret <8 x i32> %res |
| } |
| |
| declare <16 x i1> @func16xi1(<16 x i1> %a) |
| |
| define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) { |
| ; KNL-LABEL: test6: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rax |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 |
| ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL-NEXT: callq _func16xi1 |
| ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL-NEXT: vpsrad $31, %zmm0, %zmm0 |
| ; KNL-NEXT: popq %rax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test6: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rax |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: callq _func16xi1 |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; SKX-NEXT: vpsrad $31, %zmm0, %zmm0 |
| ; SKX-NEXT: popq %rax |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test6: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: subl $12, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 |
| ; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0 |
| ; KNL_X32-NEXT: calll _func16xi1 |
| ; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0 |
| ; KNL_X32-NEXT: vpsrad $31, %zmm0, %zmm0 |
| ; KNL_X32-NEXT: addl $12, %esp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test6: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rax |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2b %k0, %xmm0 |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: callq _func16xi1 |
| ; FASTISEL-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovb2m %xmm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %zmm0 |
| ; FASTISEL-NEXT: popq %rax |
| ; FASTISEL-NEXT: retq |
| %cmpRes = icmp sgt <16 x i32>%a, %b |
| %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes) |
| %res = sext <16 x i1>%resi to <16 x i32> |
| ret <16 x i32> %res |
| } |
| |
| declare <4 x i1> @func4xi1(<4 x i1> %a) |
| |
| define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) { |
| ; ALL_X64-LABEL: test7: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: pushq %rax |
| ; ALL_X64-NEXT: .cfi_def_cfa_offset 16 |
| ; ALL_X64-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: callq _func4xi1 |
| ; ALL_X64-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; ALL_X64-NEXT: popq %rax |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test7: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: subl $12, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: calll _func4xi1 |
| ; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: addl $12, %esp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test7: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rax |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %xmm0 |
| ; FASTISEL-NEXT: callq _func4xi1 |
| ; FASTISEL-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; FASTISEL-NEXT: vpmovd2m %xmm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %xmm0 |
| ; FASTISEL-NEXT: popq %rax |
| ; FASTISEL-NEXT: retq |
| %cmpRes = icmp sgt <4 x i32>%a, %b |
| %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes) |
| %res = sext <4 x i1>%resi to <4 x i32> |
| ret <4 x i32> %res |
| } |
| |
| define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { |
| ; KNL-LABEL: test7a: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rax |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 |
| ; KNL-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 |
| ; KNL-NEXT: callq _func8xi1 |
| ; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 |
| ; KNL-NEXT: popq %rax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test7a: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rax |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: callq _func8xi1 |
| ; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 |
| ; SKX-NEXT: popq %rax |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test7a: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: subl $12, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 |
| ; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0 |
| ; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 |
| ; KNL_X32-NEXT: calll _func8xi1 |
| ; KNL_X32-NEXT: vandps LCPI7_0, %xmm0, %xmm0 |
| ; KNL_X32-NEXT: addl $12, %esp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test7a: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rax |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 |
| ; FASTISEL-NEXT: vpmovm2w %k0, %xmm0 |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: callq _func8xi1 |
| ; FASTISEL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 |
| ; FASTISEL-NEXT: popq %rax |
| ; FASTISEL-NEXT: retq |
| %cmpRes = icmp sgt <8 x i32>%a, %b |
| %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes) |
| %res = and <8 x i1>%resi, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |
| ret <8 x i1> %res |
| } |
| |
| define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) { |
| ; ALL_X64-LABEL: test8: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: testb $1, %dil |
| ; ALL_X64-NEXT: jne LBB8_2 |
| ; ALL_X64-NEXT: ## %bb.1: |
| ; ALL_X64-NEXT: vmovaps %xmm1, %xmm0 |
| ; ALL_X64-NEXT: LBB8_2: |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test8: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: jne LBB8_2 |
| ; KNL_X32-NEXT: ## %bb.1: |
| ; KNL_X32-NEXT: vmovaps %xmm1, %xmm0 |
| ; KNL_X32-NEXT: LBB8_2: |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test8: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: testb $1, %dil |
| ; FASTISEL-NEXT: jne LBB8_2 |
| ; FASTISEL-NEXT: ## %bb.1: |
| ; FASTISEL-NEXT: vmovaps %xmm1, %xmm0 |
| ; FASTISEL-NEXT: LBB8_2: |
| ; FASTISEL-NEXT: retq |
| %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2 |
| ret <16 x i8> %res |
| } |
| |
| define i1 @test9(double %a, double %b) { |
| ; ALL_X64-LABEL: test9: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: vucomisd %xmm0, %xmm1 |
| ; ALL_X64-NEXT: setb %al |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test9: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; KNL_X32-NEXT: vucomisd {{[0-9]+}}(%esp), %xmm0 |
| ; KNL_X32-NEXT: setb %al |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test9: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: vucomisd %xmm0, %xmm1 |
| ; FASTISEL-NEXT: setb %al |
| ; FASTISEL-NEXT: retq |
| %c = fcmp ugt double %a, %b |
| ret i1 %c |
| } |
| |
| define i32 @test10(i32 %a, i32 %b, i1 %cond) { |
| ; ALL_X64-LABEL: test10: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: movl %edi, %eax |
| ; ALL_X64-NEXT: testb $1, %dl |
| ; ALL_X64-NEXT: cmovel %esi, %eax |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test10: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %ecx |
| ; KNL_X32-NEXT: cmovnel %eax, %ecx |
| ; KNL_X32-NEXT: movl (%ecx), %eax |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test10: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: movl %edi, %eax |
| ; FASTISEL-NEXT: testb $1, %dl |
| ; FASTISEL-NEXT: cmovel %esi, %eax |
| ; FASTISEL-NEXT: retq |
| %c = select i1 %cond, i32 %a, i32 %b |
| ret i32 %c |
| } |
| |
| define i1 @test11(i32 %a, i32 %b) { |
| ; ALL_X64-LABEL: test11: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: cmpl %esi, %edi |
| ; ALL_X64-NEXT: setg %al |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test11: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: setg %al |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test11: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: cmpl %esi, %edi |
| ; FASTISEL-NEXT: setg %al |
| ; FASTISEL-NEXT: retq |
| %c = icmp sgt i32 %a, %b |
| ret i1 %c |
| } |
| |
| define i32 @test12(i32 %a1, i32 %a2, i32 %b1) { |
| ; ALL_X64-LABEL: test12: |
| ; ALL_X64: ## %bb.0: |
| ; ALL_X64-NEXT: pushq %rbp |
| ; ALL_X64-NEXT: .cfi_def_cfa_offset 16 |
| ; ALL_X64-NEXT: pushq %r14 |
| ; ALL_X64-NEXT: .cfi_def_cfa_offset 24 |
| ; ALL_X64-NEXT: pushq %rbx |
| ; ALL_X64-NEXT: .cfi_def_cfa_offset 32 |
| ; ALL_X64-NEXT: .cfi_offset %rbx, -32 |
| ; ALL_X64-NEXT: .cfi_offset %r14, -24 |
| ; ALL_X64-NEXT: .cfi_offset %rbp, -16 |
| ; ALL_X64-NEXT: movl %esi, %r14d |
| ; ALL_X64-NEXT: movl %edi, %ebp |
| ; ALL_X64-NEXT: movl %edx, %esi |
| ; ALL_X64-NEXT: callq _test11 |
| ; ALL_X64-NEXT: movzbl %al, %ebx |
| ; ALL_X64-NEXT: movl %ebp, %edi |
| ; ALL_X64-NEXT: movl %r14d, %esi |
| ; ALL_X64-NEXT: movl %ebx, %edx |
| ; ALL_X64-NEXT: callq _test10 |
| ; ALL_X64-NEXT: xorl %ecx, %ecx |
| ; ALL_X64-NEXT: testb $1, %bl |
| ; ALL_X64-NEXT: cmovel %ecx, %eax |
| ; ALL_X64-NEXT: popq %rbx |
| ; ALL_X64-NEXT: popq %r14 |
| ; ALL_X64-NEXT: popq %rbp |
| ; ALL_X64-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test12: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebx |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: pushl %edi |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 12 |
| ; KNL_X32-NEXT: pushl %esi |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: subl $16, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL_X32-NEXT: .cfi_offset %esi, -16 |
| ; KNL_X32-NEXT: .cfi_offset %edi, -12 |
| ; KNL_X32-NEXT: .cfi_offset %ebx, -8 |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl %edi, (%esp) |
| ; KNL_X32-NEXT: calll _test11 |
| ; KNL_X32-NEXT: movl %eax, %ebx |
| ; KNL_X32-NEXT: movzbl %al, %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl %edi, (%esp) |
| ; KNL_X32-NEXT: calll _test10 |
| ; KNL_X32-NEXT: xorl %ecx, %ecx |
| ; KNL_X32-NEXT: testb $1, %bl |
| ; KNL_X32-NEXT: cmovel %ecx, %eax |
| ; KNL_X32-NEXT: addl $16, %esp |
| ; KNL_X32-NEXT: popl %esi |
| ; KNL_X32-NEXT: popl %edi |
| ; KNL_X32-NEXT: popl %ebx |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test12: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: pushq %r14 |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 24 |
| ; FASTISEL-NEXT: pushq %rbx |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 32 |
| ; FASTISEL-NEXT: .cfi_offset %rbx, -32 |
| ; FASTISEL-NEXT: .cfi_offset %r14, -24 |
| ; FASTISEL-NEXT: .cfi_offset %rbp, -16 |
| ; FASTISEL-NEXT: movl %esi, %r14d |
| ; FASTISEL-NEXT: movl %edi, %ebp |
| ; FASTISEL-NEXT: movl %edx, %esi |
| ; FASTISEL-NEXT: callq _test11 |
| ; FASTISEL-NEXT: movzbl %al, %ebx |
| ; FASTISEL-NEXT: movl %ebp, %edi |
| ; FASTISEL-NEXT: movl %r14d, %esi |
| ; FASTISEL-NEXT: movl %ebx, %edx |
| ; FASTISEL-NEXT: callq _test10 |
| ; FASTISEL-NEXT: xorl %ecx, %ecx |
| ; FASTISEL-NEXT: testb $1, %bl |
| ; FASTISEL-NEXT: cmovel %ecx, %eax |
| ; FASTISEL-NEXT: popq %rbx |
| ; FASTISEL-NEXT: popq %r14 |
| ; FASTISEL-NEXT: popq %rbp |
| ; FASTISEL-NEXT: retq |
| %cond = call i1 @test11(i32 %a1, i32 %b1) |
| %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond) |
| %res1 = select i1 %cond, i32 %res, i32 0 |
| ret i32 %res1 |
| } |
| |
| define <1 x i1> @test13(<1 x i1>* %foo) { |
| ; KNL-LABEL: test13: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: movzbl (%rdi), %eax |
| ; KNL-NEXT: ## kill: def $al killed $al killed $eax |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test13: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 |
| ; SKX-NEXT: kmovd %k0, %eax |
| ; SKX-NEXT: ## kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test13: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: movzbl (%eax), %eax |
| ; KNL_X32-NEXT: ## kill: def $al killed $al killed $eax |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test13: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: kmovb (%rdi), %k0 |
| ; FASTISEL-NEXT: kmovd %k0, %eax |
| ; FASTISEL-NEXT: ## kill: def $al killed $al killed $eax |
| ; FASTISEL-NEXT: retq |
| %bar = load <1 x i1>, <1 x i1>* %foo |
| ret <1 x i1> %bar |
| } |
| |
| define void @test14(<32 x i16>* %x) { |
| ; KNL-LABEL: test14: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rbx |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: .cfi_offset %rbx, -16 |
| ; KNL-NEXT: movq %rdi, %rbx |
| ; KNL-NEXT: vmovaps (%rdi), %zmm0 |
| ; KNL-NEXT: callq _test14_callee |
| ; KNL-NEXT: vmovaps %zmm0, (%rbx) |
| ; KNL-NEXT: popq %rbx |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test14: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rbx |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: .cfi_offset %rbx, -16 |
| ; SKX-NEXT: movq %rdi, %rbx |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 |
| ; SKX-NEXT: callq _test14_callee |
| ; SKX-NEXT: vmovaps %zmm0, (%rbx) |
| ; SKX-NEXT: popq %rbx |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test14: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %esi |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: subl $8, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: .cfi_offset %esi, -8 |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; KNL_X32-NEXT: vmovaps (%esi), %zmm0 |
| ; KNL_X32-NEXT: calll _test14_callee |
| ; KNL_X32-NEXT: vmovaps %zmm0, (%esi) |
| ; KNL_X32-NEXT: addl $8, %esp |
| ; KNL_X32-NEXT: popl %esi |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test14: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbx |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: .cfi_offset %rbx, -16 |
| ; FASTISEL-NEXT: movq %rdi, %rbx |
| ; FASTISEL-NEXT: vmovaps (%rdi), %zmm0 |
| ; FASTISEL-NEXT: callq _test14_callee |
| ; FASTISEL-NEXT: vmovaps %zmm0, (%rbx) |
| ; FASTISEL-NEXT: popq %rbx |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| %a = load <32 x i16>, <32 x i16>* %x |
| %b = call <32 x i16> @test14_callee(<32 x i16> %a) |
| store <32 x i16> %b, <32 x i16>* %x |
| ret void |
| } |
| declare <32 x i16> @test14_callee(<32 x i16>) |
| |
| define void @test15(<64 x i8>* %x) { |
| ; KNL-LABEL: test15: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rbx |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: .cfi_offset %rbx, -16 |
| ; KNL-NEXT: movq %rdi, %rbx |
| ; KNL-NEXT: vmovaps (%rdi), %zmm0 |
| ; KNL-NEXT: callq _test15_callee |
| ; KNL-NEXT: vmovaps %zmm0, (%rbx) |
| ; KNL-NEXT: popq %rbx |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test15: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rbx |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: .cfi_offset %rbx, -16 |
| ; SKX-NEXT: movq %rdi, %rbx |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 |
| ; SKX-NEXT: callq _test15_callee |
| ; SKX-NEXT: vmovaps %zmm0, (%rbx) |
| ; SKX-NEXT: popq %rbx |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test15: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %esi |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: subl $8, %esp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL_X32-NEXT: .cfi_offset %esi, -8 |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; KNL_X32-NEXT: vmovaps (%esi), %zmm0 |
| ; KNL_X32-NEXT: calll _test15_callee |
| ; KNL_X32-NEXT: vmovaps %zmm0, (%esi) |
| ; KNL_X32-NEXT: addl $8, %esp |
| ; KNL_X32-NEXT: popl %esi |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: test15: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbx |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: .cfi_offset %rbx, -16 |
| ; FASTISEL-NEXT: movq %rdi, %rbx |
| ; FASTISEL-NEXT: vmovaps (%rdi), %zmm0 |
| ; FASTISEL-NEXT: callq _test15_callee |
| ; FASTISEL-NEXT: vmovaps %zmm0, (%rbx) |
| ; FASTISEL-NEXT: popq %rbx |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| %a = load <64 x i8>, <64 x i8>* %x |
| %b = call <64 x i8> @test15_callee(<64 x i8> %a) |
| store <64 x i8> %b, <64 x i8>* %x |
| ret void |
| } |
| declare <64 x i8> @test15_callee(<64 x i8>) |
| |
| define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { |
| ; KNL-LABEL: test16: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: pushq %r15 |
| ; KNL-NEXT: pushq %r14 |
| ; KNL-NEXT: pushq %r13 |
| ; KNL-NEXT: pushq %r12 |
| ; KNL-NEXT: pushq %rbx |
| ; KNL-NEXT: movq %rdi, %rax |
| ; KNL-NEXT: movw $-3, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $14, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-5, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $13, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-9, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $12, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-17, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $11, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-33, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $10, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-65, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $9, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-129, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $8, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-257, %di ## imm = 0xFEFF |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $7, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF |
| ; KNL-NEXT: kmovw %edi, %k5 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $6, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-1025, %di ## imm = 0xFBFF |
| ; KNL-NEXT: kmovw %edi, %k4 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $5, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-2049, %di ## imm = 0xF7FF |
| ; KNL-NEXT: kmovw %edi, %k3 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $4, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-4097, %di ## imm = 0xEFFF |
| ; KNL-NEXT: kmovw %edi, %k2 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $3, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k1 |
| ; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k0, %k1, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k6 |
| ; KNL-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL-NEXT: kshiftrw $2, %k6, %k6 |
| ; KNL-NEXT: korw %k6, %k1, %k6 |
| ; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kandw %k1, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kshiftlw $1, %k6, %k6 |
| ; KNL-NEXT: kshiftrw $1, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kmovw %esi, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw %edx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw %r8d, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw %r9d, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $8, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k7, %k6, %k6 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $7, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k6, %k6 |
| ; KNL-NEXT: kandw %k5, %k6, %k5 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k6 |
| ; KNL-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL-NEXT: kshiftrw $6, %k6, %k6 |
| ; KNL-NEXT: korw %k6, %k5, %k5 |
| ; KNL-NEXT: kandw %k4, %k5, %k4 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k5 |
| ; KNL-NEXT: kshiftlw $15, %k5, %k5 |
| ; KNL-NEXT: kshiftrw $5, %k5, %k5 |
| ; KNL-NEXT: korw %k5, %k4, %k4 |
| ; KNL-NEXT: kandw %k3, %k4, %k3 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k4 |
| ; KNL-NEXT: kshiftlw $15, %k4, %k4 |
| ; KNL-NEXT: kshiftrw $4, %k4, %k4 |
| ; KNL-NEXT: korw %k4, %k3, %k3 |
| ; KNL-NEXT: kandw %k2, %k3, %k2 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k3 |
| ; KNL-NEXT: kshiftlw $15, %k3, %k3 |
| ; KNL-NEXT: kshiftrw $3, %k3, %k3 |
| ; KNL-NEXT: korw %k3, %k2, %k2 |
| ; KNL-NEXT: kandw %k0, %k2, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $2, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k0, %k0 |
| ; KNL-NEXT: xorl %ecx, %ecx |
| ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF |
| ; KNL-NEXT: movl $0, %esi |
| ; KNL-NEXT: cmovnel %edx, %esi |
| ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: cmovnel %edx, %ecx |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl |
| ; KNL-NEXT: kmovw %edx, %k1 |
| ; KNL-NEXT: kshiftlw $14, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl |
| ; KNL-NEXT: kmovw %edx, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %esi, %k1 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kandw %k1, %k2, %k1 |
| ; KNL-NEXT: kmovw %k1, %r8d |
| ; KNL-NEXT: kshiftrw $1, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: kshiftrw $2, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r10d |
| ; KNL-NEXT: kshiftrw $3, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r11d |
| ; KNL-NEXT: kshiftrw $4, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r12d |
| ; KNL-NEXT: kshiftrw $5, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r15d |
| ; KNL-NEXT: kshiftrw $6, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r14d |
| ; KNL-NEXT: kshiftrw $7, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r13d |
| ; KNL-NEXT: kshiftrw $8, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebx |
| ; KNL-NEXT: kshiftrw $9, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %esi |
| ; KNL-NEXT: kshiftrw $10, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %ebp |
| ; KNL-NEXT: kshiftrw $11, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: kshiftrw $12, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %edx |
| ; KNL-NEXT: kshiftrw $13, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %edi |
| ; KNL-NEXT: kshiftrw $14, %k0, %k1 |
| ; KNL-NEXT: andl $1, %r8d |
| ; KNL-NEXT: movb %r8b, 2(%rax) |
| ; KNL-NEXT: kmovw %k0, %r8d |
| ; KNL-NEXT: andl $1, %r8d |
| ; KNL-NEXT: andl $1, %r9d |
| ; KNL-NEXT: leal (%r8,%r9,2), %r8d |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL-NEXT: andl $1, %r10d |
| ; KNL-NEXT: leal (%r8,%r10,4), %r8d |
| ; KNL-NEXT: kmovw %k0, %r10d |
| ; KNL-NEXT: andl $1, %r11d |
| ; KNL-NEXT: leal (%r8,%r11,8), %r8d |
| ; KNL-NEXT: andl $1, %r12d |
| ; KNL-NEXT: shll $4, %r12d |
| ; KNL-NEXT: orl %r8d, %r12d |
| ; KNL-NEXT: andl $1, %r15d |
| ; KNL-NEXT: shll $5, %r15d |
| ; KNL-NEXT: orl %r12d, %r15d |
| ; KNL-NEXT: andl $1, %r14d |
| ; KNL-NEXT: shll $6, %r14d |
| ; KNL-NEXT: andl $1, %r13d |
| ; KNL-NEXT: shll $7, %r13d |
| ; KNL-NEXT: orl %r14d, %r13d |
| ; KNL-NEXT: andl $1, %ebx |
| ; KNL-NEXT: shll $8, %ebx |
| ; KNL-NEXT: orl %r13d, %ebx |
| ; KNL-NEXT: andl $1, %esi |
| ; KNL-NEXT: shll $9, %esi |
| ; KNL-NEXT: orl %ebx, %esi |
| ; KNL-NEXT: andl $1, %ebp |
| ; KNL-NEXT: shll $10, %ebp |
| ; KNL-NEXT: orl %esi, %ebp |
| ; KNL-NEXT: orl %r15d, %ebp |
| ; KNL-NEXT: andl $1, %ecx |
| ; KNL-NEXT: shll $11, %ecx |
| ; KNL-NEXT: andl $1, %edx |
| ; KNL-NEXT: shll $12, %edx |
| ; KNL-NEXT: orl %ecx, %edx |
| ; KNL-NEXT: andl $1, %edi |
| ; KNL-NEXT: shll $13, %edi |
| ; KNL-NEXT: orl %edx, %edi |
| ; KNL-NEXT: andl $1, %r9d |
| ; KNL-NEXT: shll $14, %r9d |
| ; KNL-NEXT: orl %edi, %r9d |
| ; KNL-NEXT: andl $1, %r10d |
| ; KNL-NEXT: shll $15, %r10d |
| ; KNL-NEXT: orl %r9d, %r10d |
| ; KNL-NEXT: orl %ebp, %r10d |
| ; KNL-NEXT: movw %r10w, (%rax) |
| ; KNL-NEXT: popq %rbx |
| ; KNL-NEXT: popq %r12 |
| ; KNL-NEXT: popq %r13 |
| ; KNL-NEXT: popq %r14 |
| ; KNL-NEXT: popq %r15 |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test16: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rbp |
| ; SKX-NEXT: pushq %r15 |
| ; SKX-NEXT: pushq %r14 |
| ; SKX-NEXT: pushq %r13 |
| ; SKX-NEXT: pushq %r12 |
| ; SKX-NEXT: pushq %rbx |
| ; SKX-NEXT: movq %rdi, %rax |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: movl $-3, %edi |
| ; SKX-NEXT: kmovd %edi, %k2 |
| ; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k2, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $30, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-5, %edi |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $29, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-9, %edi |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $28, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: movl $-17, %edi |
| ; SKX-NEXT: kmovd %edi, %k2 |
| ; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k2, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $27, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-33, %edi |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $26, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-65, %edi |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $25, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: movl $-129, %edi |
| ; SKX-NEXT: kmovd %edi, %k2 |
| ; SKX-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k2, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $24, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-257, %edi ## imm = 0xFEFF |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $23, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-513, %edi ## imm = 0xFDFF |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $22, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: movl $-1025, %edi ## imm = 0xFBFF |
| ; SKX-NEXT: kmovd %edi, %k6 |
| ; SKX-NEXT: kandd %k6, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $21, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-2049, %edi ## imm = 0xF7FF |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $20, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-4097, %edi ## imm = 0xEFFF |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $19, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: movl $-8193, %edi ## imm = 0xDFFF |
| ; SKX-NEXT: kmovd %edi, %k4 |
| ; SKX-NEXT: kandd %k4, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $18, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-16385, %edi ## imm = 0xBFFF |
| ; SKX-NEXT: kmovd %edi, %k5 |
| ; SKX-NEXT: kandd %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $17, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: movl $-32769, %edi ## imm = 0xFFFF7FFF |
| ; SKX-NEXT: kmovd %edi, %k3 |
| ; SKX-NEXT: kandd %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $16, %k7, %k7 |
| ; SKX-NEXT: kord %k7, %k0, %k7 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF |
| ; SKX-NEXT: kmovd %edi, %k2 |
| ; SKX-NEXT: kandd %k2, %k7, %k7 |
| ; SKX-NEXT: kshiftld $31, %k0, %k0 |
| ; SKX-NEXT: kshiftrd $15, %k0, %k0 |
| ; SKX-NEXT: kord %k0, %k7, %k0 |
| ; SKX-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; SKX-NEXT: kmovd %esi, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %edx, %k7 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $30, %k7, %k7 |
| ; SKX-NEXT: kord %k7, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %ecx, %k7 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $29, %k7, %k7 |
| ; SKX-NEXT: kord %k7, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %r8d, %k7 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $28, %k7, %k7 |
| ; SKX-NEXT: kord %k7, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %r9d, %k7 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $27, %k7, %k7 |
| ; SKX-NEXT: kord %k7, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k1 |
| ; SKX-NEXT: kshiftld $31, %k7, %k7 |
| ; SKX-NEXT: kshiftrd $26, %k7, %k7 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kord %k7, %k1, %k1 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k7, %k1, %k1 |
| ; SKX-NEXT: kshiftld $31, %k0, %k0 |
| ; SKX-NEXT: kshiftrd $25, %k0, %k0 |
| ; SKX-NEXT: kord %k0, %k1, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $24, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $23, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $22, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kandd %k6, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $21, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k6, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $20, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kshiftld $31, %k6, %k1 |
| ; SKX-NEXT: kshiftrd $19, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kandd %k4, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $18, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kandd %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $17, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kandd %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $16, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kandd %k2, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftld $31, %k1, %k1 |
| ; SKX-NEXT: kshiftrd $15, %k1, %k1 |
| ; SKX-NEXT: kord %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; SKX-NEXT: kandd %k1, %k0, %k0 |
| ; SKX-NEXT: kshiftrd $16, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r8d |
| ; SKX-NEXT: kshiftrd $1, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r9d |
| ; SKX-NEXT: kshiftrd $2, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r10d |
| ; SKX-NEXT: kshiftrd $3, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r11d |
| ; SKX-NEXT: kshiftrd $4, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r12d |
| ; SKX-NEXT: kshiftrd $5, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r15d |
| ; SKX-NEXT: kshiftrd $6, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r14d |
| ; SKX-NEXT: kshiftrd $7, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r13d |
| ; SKX-NEXT: kshiftrd $8, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %ebx |
| ; SKX-NEXT: kshiftrd $9, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %esi |
| ; SKX-NEXT: kshiftrd $10, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %ebp |
| ; SKX-NEXT: kshiftrd $11, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %ecx |
| ; SKX-NEXT: kshiftrd $12, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %edx |
| ; SKX-NEXT: kshiftrd $13, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %edi |
| ; SKX-NEXT: kshiftrd $14, %k0, %k1 |
| ; SKX-NEXT: andl $1, %r8d |
| ; SKX-NEXT: movb %r8b, 2(%rax) |
| ; SKX-NEXT: kmovd %k0, %r8d |
| ; SKX-NEXT: andl $1, %r8d |
| ; SKX-NEXT: andl $1, %r9d |
| ; SKX-NEXT: leal (%r8,%r9,2), %r8d |
| ; SKX-NEXT: kmovd %k1, %r9d |
| ; SKX-NEXT: kshiftrd $15, %k0, %k0 |
| ; SKX-NEXT: andl $1, %r10d |
| ; SKX-NEXT: leal (%r8,%r10,4), %r8d |
| ; SKX-NEXT: kmovd %k0, %r10d |
| ; SKX-NEXT: andl $1, %r11d |
| ; SKX-NEXT: leal (%r8,%r11,8), %r8d |
| ; SKX-NEXT: andl $1, %r12d |
| ; SKX-NEXT: shll $4, %r12d |
| ; SKX-NEXT: orl %r8d, %r12d |
| ; SKX-NEXT: andl $1, %r15d |
| ; SKX-NEXT: shll $5, %r15d |
| ; SKX-NEXT: orl %r12d, %r15d |
| ; SKX-NEXT: andl $1, %r14d |
| ; SKX-NEXT: shll $6, %r14d |
| ; SKX-NEXT: andl $1, %r13d |
| ; SKX-NEXT: shll $7, %r13d |
| ; SKX-NEXT: orl %r14d, %r13d |
| ; SKX-NEXT: andl $1, %ebx |
| ; SKX-NEXT: shll $8, %ebx |
| ; SKX-NEXT: orl %r13d, %ebx |
| ; SKX-NEXT: andl $1, %esi |
| ; SKX-NEXT: shll $9, %esi |
| ; SKX-NEXT: orl %ebx, %esi |
| ; SKX-NEXT: andl $1, %ebp |
| ; SKX-NEXT: shll $10, %ebp |
| ; SKX-NEXT: orl %esi, %ebp |
| ; SKX-NEXT: orl %r15d, %ebp |
| ; SKX-NEXT: andl $1, %ecx |
| ; SKX-NEXT: shll $11, %ecx |
| ; SKX-NEXT: andl $1, %edx |
| ; SKX-NEXT: shll $12, %edx |
| ; SKX-NEXT: orl %ecx, %edx |
| ; SKX-NEXT: andl $1, %edi |
| ; SKX-NEXT: shll $13, %edi |
| ; SKX-NEXT: orl %edx, %edi |
| ; SKX-NEXT: andl $1, %r9d |
| ; SKX-NEXT: shll $14, %r9d |
| ; SKX-NEXT: orl %edi, %r9d |
| ; SKX-NEXT: andl $1, %r10d |
| ; SKX-NEXT: shll $15, %r10d |
| ; SKX-NEXT: orl %r9d, %r10d |
| ; SKX-NEXT: orl %ebp, %r10d |
| ; SKX-NEXT: movw %r10w, (%rax) |
| ; SKX-NEXT: popq %rbx |
| ; SKX-NEXT: popq %r12 |
| ; SKX-NEXT: popq %r13 |
| ; SKX-NEXT: popq %r14 |
| ; SKX-NEXT: popq %r15 |
| ; SKX-NEXT: popq %rbp |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test16: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: pushl %ebx |
| ; KNL_X32-NEXT: pushl %edi |
| ; KNL_X32-NEXT: pushl %esi |
| ; KNL_X32-NEXT: subl $20, %esp |
| ; KNL_X32-NEXT: movw $-3, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-5, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-9, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-17, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-33, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-65, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-129, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-257, %ax ## imm = 0xFEFF |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-513, %ax ## imm = 0xFDFF |
| ; KNL_X32-NEXT: kmovw %eax, %k5 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-1025, %ax ## imm = 0xFBFF |
| ; KNL_X32-NEXT: kmovw %eax, %k4 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-2049, %ax ## imm = 0xF7FF |
| ; KNL_X32-NEXT: kmovw %eax, %k3 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-4097, %ax ## imm = 0xEFFF |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-8193, %ax ## imm = 0xDFFF |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k6 |
| ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL_X32-NEXT: kshiftrw $2, %k6, %k6 |
| ; KNL_X32-NEXT: korw %k6, %k0, %k6 |
| ; KNL_X32-NEXT: movw $-16385, %ax ## imm = 0xBFFF |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k0, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kshiftlw $1, %k6, %k6 |
| ; KNL_X32-NEXT: kshiftrw $1, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $8, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $7, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k6, %k6 |
| ; KNL_X32-NEXT: kandw %k5, %k6, %k5 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k6 |
| ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 |
| ; KNL_X32-NEXT: korw %k6, %k5, %k5 |
| ; KNL_X32-NEXT: kandw %k4, %k5, %k4 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k5 |
| ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 |
| ; KNL_X32-NEXT: kshiftrw $5, %k5, %k5 |
| ; KNL_X32-NEXT: korw %k5, %k4, %k4 |
| ; KNL_X32-NEXT: kandw %k3, %k4, %k3 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k4 |
| ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 |
| ; KNL_X32-NEXT: kshiftrw $4, %k4, %k4 |
| ; KNL_X32-NEXT: korw %k4, %k3, %k3 |
| ; KNL_X32-NEXT: kandw %k2, %k3, %k2 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k3 |
| ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 |
| ; KNL_X32-NEXT: kshiftrw $3, %k3, %k3 |
| ; KNL_X32-NEXT: korw %k3, %k2, %k2 |
| ; KNL_X32-NEXT: kandw %k1, %k2, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $2, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: xorl %eax, %eax |
| ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF |
| ; KNL_X32-NEXT: movl $0, %edx |
| ; KNL_X32-NEXT: cmovnel %ecx, %edx |
| ; KNL_X32-NEXT: kandw %k0, %k1, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl |
| ; KNL_X32-NEXT: kmovw %ebx, %k1 |
| ; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 |
| ; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl |
| ; KNL_X32-NEXT: kmovw %ebx, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %edx, %k1 |
| ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: cmovnel %ecx, %eax |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kandw %k1, %k2, %k1 |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: kmovw %k1, %ebx |
| ; KNL_X32-NEXT: kshiftrw $1, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %esi |
| ; KNL_X32-NEXT: kshiftrw $2, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %edi |
| ; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %ebp |
| ; KNL_X32-NEXT: kshiftrw $4, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %edx |
| ; KNL_X32-NEXT: kshiftrw $5, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %ecx |
| ; KNL_X32-NEXT: kshiftrw $6, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %ebx |
| ; KNL_X32-NEXT: movb %bl, 2(%eax) |
| ; KNL_X32-NEXT: kmovw %k0, %ebx |
| ; KNL_X32-NEXT: andl $1, %ebx |
| ; KNL_X32-NEXT: andl $1, %esi |
| ; KNL_X32-NEXT: leal (%ebx,%esi,2), %esi |
| ; KNL_X32-NEXT: kmovw %k1, %ebx |
| ; KNL_X32-NEXT: kshiftrw $7, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %edi |
| ; KNL_X32-NEXT: leal (%esi,%edi,4), %esi |
| ; KNL_X32-NEXT: kmovw %k1, %edi |
| ; KNL_X32-NEXT: kshiftrw $8, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %ebp |
| ; KNL_X32-NEXT: leal (%esi,%ebp,8), %esi |
| ; KNL_X32-NEXT: kmovw %k1, %ebp |
| ; KNL_X32-NEXT: kshiftrw $9, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %edx |
| ; KNL_X32-NEXT: shll $4, %edx |
| ; KNL_X32-NEXT: orl %esi, %edx |
| ; KNL_X32-NEXT: kmovw %k1, %esi |
| ; KNL_X32-NEXT: kshiftrw $10, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %ecx |
| ; KNL_X32-NEXT: shll $5, %ecx |
| ; KNL_X32-NEXT: orl %edx, %ecx |
| ; KNL_X32-NEXT: kmovw %k1, %edx |
| ; KNL_X32-NEXT: kshiftrw $11, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %ebx |
| ; KNL_X32-NEXT: shll $6, %ebx |
| ; KNL_X32-NEXT: andl $1, %edi |
| ; KNL_X32-NEXT: shll $7, %edi |
| ; KNL_X32-NEXT: orl %ebx, %edi |
| ; KNL_X32-NEXT: kmovw %k1, %ebx |
| ; KNL_X32-NEXT: kshiftrw $12, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %ebp |
| ; KNL_X32-NEXT: shll $8, %ebp |
| ; KNL_X32-NEXT: orl %edi, %ebp |
| ; KNL_X32-NEXT: kmovw %k1, %edi |
| ; KNL_X32-NEXT: kshiftrw $13, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %esi |
| ; KNL_X32-NEXT: shll $9, %esi |
| ; KNL_X32-NEXT: orl %ebp, %esi |
| ; KNL_X32-NEXT: kmovw %k1, %ebp |
| ; KNL_X32-NEXT: kshiftrw $14, %k0, %k1 |
| ; KNL_X32-NEXT: andl $1, %edx |
| ; KNL_X32-NEXT: shll $10, %edx |
| ; KNL_X32-NEXT: orl %esi, %edx |
| ; KNL_X32-NEXT: kmovw %k1, %esi |
| ; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 |
| ; KNL_X32-NEXT: orl %ecx, %edx |
| ; KNL_X32-NEXT: kmovw %k0, %ecx |
| ; KNL_X32-NEXT: andl $1, %ebx |
| ; KNL_X32-NEXT: shll $11, %ebx |
| ; KNL_X32-NEXT: andl $1, %edi |
| ; KNL_X32-NEXT: shll $12, %edi |
| ; KNL_X32-NEXT: orl %ebx, %edi |
| ; KNL_X32-NEXT: andl $1, %ebp |
| ; KNL_X32-NEXT: shll $13, %ebp |
| ; KNL_X32-NEXT: orl %edi, %ebp |
| ; KNL_X32-NEXT: andl $1, %esi |
| ; KNL_X32-NEXT: shll $14, %esi |
| ; KNL_X32-NEXT: orl %ebp, %esi |
| ; KNL_X32-NEXT: andl $1, %ecx |
| ; KNL_X32-NEXT: shll $15, %ecx |
| ; KNL_X32-NEXT: orl %esi, %ecx |
| ; KNL_X32-NEXT: orl %edx, %ecx |
| ; KNL_X32-NEXT: movw %cx, (%eax) |
| ; KNL_X32-NEXT: addl $20, %esp |
| ; KNL_X32-NEXT: popl %esi |
| ; KNL_X32-NEXT: popl %edi |
| ; KNL_X32-NEXT: popl %ebx |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl $4 |
| ; |
| ; FASTISEL-LABEL: test16: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbp |
| ; FASTISEL-NEXT: pushq %r15 |
| ; FASTISEL-NEXT: pushq %r14 |
| ; FASTISEL-NEXT: pushq %r13 |
| ; FASTISEL-NEXT: pushq %r12 |
| ; FASTISEL-NEXT: pushq %rbx |
| ; FASTISEL-NEXT: movq %rdi, %rax |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: movl $-3, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k2 |
| ; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $30, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-5, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $29, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-9, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $28, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: movl $-17, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k2 |
| ; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $27, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-33, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $26, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-65, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $25, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: movl $-129, %edi |
| ; FASTISEL-NEXT: kmovd %edi, %k2 |
| ; FASTISEL-NEXT: kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $24, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-257, %edi ## imm = 0xFEFF |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $23, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-513, %edi ## imm = 0xFDFF |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: movl $-1025, %edi ## imm = 0xFBFF |
| ; FASTISEL-NEXT: kmovd %edi, %k6 |
| ; FASTISEL-NEXT: kandd %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-2049, %edi ## imm = 0xF7FF |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-4097, %edi ## imm = 0xEFFF |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: movl $-8193, %edi ## imm = 0xDFFF |
| ; FASTISEL-NEXT: kmovd %edi, %k4 |
| ; FASTISEL-NEXT: kandd %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-16385, %edi ## imm = 0xBFFF |
| ; FASTISEL-NEXT: kmovd %edi, %k5 |
| ; FASTISEL-NEXT: kandd %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movl $-32769, %edi ## imm = 0xFFFF7FFF |
| ; FASTISEL-NEXT: kmovd %edi, %k3 |
| ; FASTISEL-NEXT: kandd %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $16, %k7, %k7 |
| ; FASTISEL-NEXT: kord %k7, %k0, %k7 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: movl $-65537, %edi ## imm = 0xFFFEFFFF |
| ; FASTISEL-NEXT: kmovd %edi, %k2 |
| ; FASTISEL-NEXT: kandd %k2, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrd $15, %k0, %k0 |
| ; FASTISEL-NEXT: kord %k0, %k7, %k0 |
| ; FASTISEL-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| ; FASTISEL-NEXT: kmovd %esi, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd %edx, %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $30, %k7, %k7 |
| ; FASTISEL-NEXT: kord %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd %ecx, %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $29, %k7, %k7 |
| ; FASTISEL-NEXT: kord %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd %r8d, %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $28, %k7, %k7 |
| ; FASTISEL-NEXT: kord %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd %r9d, %k7 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $27, %k7, %k7 |
| ; FASTISEL-NEXT: kord %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k7, %k7 |
| ; FASTISEL-NEXT: kshiftrd $26, %k7, %k7 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kord %k7, %k1, %k1 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrd $25, %k0, %k0 |
| ; FASTISEL-NEXT: kord %k0, %k1, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $24, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $23, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $22, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandd %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $21, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $20, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftld $31, %k6, %k1 |
| ; FASTISEL-NEXT: kshiftrd $19, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandd %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $18, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandd %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $17, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandd %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $16, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandd %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftld $31, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrd $15, %k1, %k1 |
| ; FASTISEL-NEXT: kord %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload |
| ; FASTISEL-NEXT: kandd %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrd $16, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r8d |
| ; FASTISEL-NEXT: kshiftrd $1, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r9d |
| ; FASTISEL-NEXT: kshiftrd $2, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r10d |
| ; FASTISEL-NEXT: kshiftrd $3, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r11d |
| ; FASTISEL-NEXT: kshiftrd $4, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r12d |
| ; FASTISEL-NEXT: kshiftrd $5, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r15d |
| ; FASTISEL-NEXT: kshiftrd $6, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r14d |
| ; FASTISEL-NEXT: kshiftrd $7, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r13d |
| ; FASTISEL-NEXT: kshiftrd $8, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %ebx |
| ; FASTISEL-NEXT: kshiftrd $9, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %esi |
| ; FASTISEL-NEXT: kshiftrd $10, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %ebp |
| ; FASTISEL-NEXT: kshiftrd $11, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %ecx |
| ; FASTISEL-NEXT: kshiftrd $12, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %edx |
| ; FASTISEL-NEXT: kshiftrd $13, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %edi |
| ; FASTISEL-NEXT: kshiftrd $14, %k0, %k1 |
| ; FASTISEL-NEXT: andl $1, %r8d |
| ; FASTISEL-NEXT: movb %r8b, 2(%rax) |
| ; FASTISEL-NEXT: kmovd %k0, %r8d |
| ; FASTISEL-NEXT: andl $1, %r8d |
| ; FASTISEL-NEXT: andl $1, %r9d |
| ; FASTISEL-NEXT: leal (%r8,%r9,2), %r8d |
| ; FASTISEL-NEXT: kmovd %k1, %r9d |
| ; FASTISEL-NEXT: kshiftrd $15, %k0, %k0 |
| ; FASTISEL-NEXT: andl $1, %r10d |
| ; FASTISEL-NEXT: leal (%r8,%r10,4), %r8d |
| ; FASTISEL-NEXT: kmovd %k0, %r10d |
| ; FASTISEL-NEXT: andl $1, %r11d |
| ; FASTISEL-NEXT: leal (%r8,%r11,8), %r8d |
| ; FASTISEL-NEXT: andl $1, %r12d |
| ; FASTISEL-NEXT: shll $4, %r12d |
| ; FASTISEL-NEXT: orl %r8d, %r12d |
| ; FASTISEL-NEXT: andl $1, %r15d |
| ; FASTISEL-NEXT: shll $5, %r15d |
| ; FASTISEL-NEXT: orl %r12d, %r15d |
| ; FASTISEL-NEXT: andl $1, %r14d |
| ; FASTISEL-NEXT: shll $6, %r14d |
| ; FASTISEL-NEXT: andl $1, %r13d |
| ; FASTISEL-NEXT: shll $7, %r13d |
| ; FASTISEL-NEXT: orl %r14d, %r13d |
| ; FASTISEL-NEXT: andl $1, %ebx |
| ; FASTISEL-NEXT: shll $8, %ebx |
| ; FASTISEL-NEXT: orl %r13d, %ebx |
| ; FASTISEL-NEXT: andl $1, %esi |
| ; FASTISEL-NEXT: shll $9, %esi |
| ; FASTISEL-NEXT: orl %ebx, %esi |
| ; FASTISEL-NEXT: andl $1, %ebp |
| ; FASTISEL-NEXT: shll $10, %ebp |
| ; FASTISEL-NEXT: orl %esi, %ebp |
| ; FASTISEL-NEXT: orl %r15d, %ebp |
| ; FASTISEL-NEXT: andl $1, %ecx |
| ; FASTISEL-NEXT: shll $11, %ecx |
| ; FASTISEL-NEXT: andl $1, %edx |
| ; FASTISEL-NEXT: shll $12, %edx |
| ; FASTISEL-NEXT: orl %ecx, %edx |
| ; FASTISEL-NEXT: andl $1, %edi |
| ; FASTISEL-NEXT: shll $13, %edi |
| ; FASTISEL-NEXT: orl %edx, %edi |
| ; FASTISEL-NEXT: andl $1, %r9d |
| ; FASTISEL-NEXT: shll $14, %r9d |
| ; FASTISEL-NEXT: orl %edi, %r9d |
| ; FASTISEL-NEXT: andl $1, %r10d |
| ; FASTISEL-NEXT: shll $15, %r10d |
| ; FASTISEL-NEXT: orl %r9d, %r10d |
| ; FASTISEL-NEXT: orl %ebp, %r10d |
| ; FASTISEL-NEXT: movw %r10w, (%rax) |
| ; FASTISEL-NEXT: popq %rbx |
| ; FASTISEL-NEXT: popq %r12 |
| ; FASTISEL-NEXT: popq %r13 |
| ; FASTISEL-NEXT: popq %r14 |
| ; FASTISEL-NEXT: popq %r15 |
| ; FASTISEL-NEXT: popq %rbp |
| ; FASTISEL-NEXT: retq |
| %c = and <17 x i1> %a, %b |
| ret <17 x i1> %c |
| } |
| |
| define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x i1>%e, <7 x i1>%f, <7 x i1> %g, <7 x i1> %h, <7 x i1> %i) nounwind { |
| ; KNL-LABEL: test17: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: movq %rdi, %rax |
| ; KNL-NEXT: movw $-3, %di |
| ; KNL-NEXT: kmovw %edi, %k2 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL-NEXT: kshiftrw $14, %k1, %k1 |
| ; KNL-NEXT: korw %k1, %k0, %k0 |
| ; KNL-NEXT: movw $-5, %di |
| ; KNL-NEXT: kmovw %edi, %k1 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k3 |
| ; KNL-NEXT: kshiftlw $15, %k3, %k3 |
| ; KNL-NEXT: kshiftrw $13, %k3, %k3 |
| ; KNL-NEXT: korw %k3, %k0, %k0 |
| ; KNL-NEXT: movw $-9, %di |
| ; KNL-NEXT: kmovw %edi, %k3 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k4 |
| ; KNL-NEXT: kshiftlw $15, %k4, %k4 |
| ; KNL-NEXT: kshiftrw $12, %k4, %k4 |
| ; KNL-NEXT: korw %k4, %k0, %k0 |
| ; KNL-NEXT: movw $-17, %di |
| ; KNL-NEXT: kmovw %edi, %k4 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k5 |
| ; KNL-NEXT: kshiftlw $15, %k5, %k5 |
| ; KNL-NEXT: kshiftrw $11, %k5, %k5 |
| ; KNL-NEXT: korw %k5, %k0, %k0 |
| ; KNL-NEXT: movw $-33, %di |
| ; KNL-NEXT: kmovw %edi, %k5 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k6 |
| ; KNL-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL-NEXT: kshiftrw $10, %k6, %k6 |
| ; KNL-NEXT: korw %k6, %k0, %k0 |
| ; KNL-NEXT: movw $-65, %di |
| ; KNL-NEXT: kmovw %edi, %k6 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil |
| ; KNL-NEXT: kmovw %edi, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; KNL-NEXT: kmovw %esi, %k0 |
| ; KNL-NEXT: kandw %k2, %k0, %k0 |
| ; KNL-NEXT: kmovw %edx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k3, %k0, %k0 |
| ; KNL-NEXT: kmovw %r8d, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k4, %k0, %k0 |
| ; KNL-NEXT: kmovw %r9d, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k5, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: kandw %k6, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k0, %k0 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kandw %k2, %k7, %k2 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k7 |
| ; KNL-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL-NEXT: korw %k7, %k2, %k2 |
| ; KNL-NEXT: kandw %k1, %k2, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $13, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k1, %k1 |
| ; KNL-NEXT: kandw %k3, %k1, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $12, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k1, %k1 |
| ; KNL-NEXT: kandw %k4, %k1, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $11, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k1, %k1 |
| ; KNL-NEXT: kandw %k5, %k1, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $10, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k1, %k1 |
| ; KNL-NEXT: kandw %k6, %k1, %k1 |
| ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl |
| ; KNL-NEXT: kmovw %ecx, %k2 |
| ; KNL-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL-NEXT: kshiftrw $9, %k2, %k2 |
| ; KNL-NEXT: korw %k2, %k1, %k1 |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL-NEXT: kandw %k1, %k0, %k0 |
| ; KNL-NEXT: kshiftrw $6, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r8d |
| ; KNL-NEXT: kshiftrw $5, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r9d |
| ; KNL-NEXT: kshiftrw $4, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %r10d |
| ; KNL-NEXT: kshiftrw $3, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %edi |
| ; KNL-NEXT: kshiftrw $2, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %ecx |
| ; KNL-NEXT: kshiftrw $1, %k0, %k1 |
| ; KNL-NEXT: kmovw %k1, %edx |
| ; KNL-NEXT: kmovw %k0, %esi |
| ; KNL-NEXT: andb $1, %sil |
| ; KNL-NEXT: andb $1, %dl |
| ; KNL-NEXT: addb %dl, %dl |
| ; KNL-NEXT: orb %sil, %dl |
| ; KNL-NEXT: andb $1, %cl |
| ; KNL-NEXT: shlb $2, %cl |
| ; KNL-NEXT: orb %dl, %cl |
| ; KNL-NEXT: andb $1, %dil |
| ; KNL-NEXT: shlb $3, %dil |
| ; KNL-NEXT: orb %cl, %dil |
| ; KNL-NEXT: andb $1, %r10b |
| ; KNL-NEXT: shlb $4, %r10b |
| ; KNL-NEXT: orb %dil, %r10b |
| ; KNL-NEXT: andb $1, %r9b |
| ; KNL-NEXT: shlb $5, %r9b |
| ; KNL-NEXT: orb %r10b, %r9b |
| ; KNL-NEXT: shlb $6, %r8b |
| ; KNL-NEXT: orb %r9b, %r8b |
| ; KNL-NEXT: andb $127, %r8b |
| ; KNL-NEXT: movb %r8b, (%rax) |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: test17: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: movq %rdi, %rax |
| ; SKX-NEXT: movb $-3, %dil |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $6, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: movb $-5, %dil |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $5, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 |
| ; SKX-NEXT: movb $-9, %dil |
| ; SKX-NEXT: kmovd %edi, %k7 |
| ; SKX-NEXT: kandb %k7, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k3, %k3 |
| ; SKX-NEXT: kshiftrb $4, %k3, %k3 |
| ; SKX-NEXT: korb %k3, %k0, %k0 |
| ; SKX-NEXT: movb $-17, %dil |
| ; SKX-NEXT: kmovd %edi, %k1 |
| ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 |
| ; SKX-NEXT: kshiftlb $7, %k4, %k4 |
| ; SKX-NEXT: kshiftrb $3, %k4, %k4 |
| ; SKX-NEXT: korb %k4, %k0, %k0 |
| ; SKX-NEXT: movb $-33, %dil |
| ; SKX-NEXT: kmovd %edi, %k4 |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 |
| ; SKX-NEXT: kshiftlb $7, %k5, %k5 |
| ; SKX-NEXT: kshiftrb $2, %k5, %k5 |
| ; SKX-NEXT: korb %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 |
| ; SKX-NEXT: movb $-65, %dil |
| ; SKX-NEXT: kmovd %edi, %k5 |
| ; SKX-NEXT: kandb %k5, %k0, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k6, %k6 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kshiftrb $1, %k6, %k6 |
| ; SKX-NEXT: korb %k6, %k1, %k1 |
| ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k6, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $6, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k3, %k0, %k2 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $5, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: korb %k1, %k2, %k1 |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k0, %k0 |
| ; SKX-NEXT: kshiftrb $4, %k0, %k0 |
| ; SKX-NEXT: korb %k0, %k1, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k2, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $3, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $2, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kandb %k6, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $6, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $5, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k7, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $4, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kmovq %k2, %k3 |
| ; SKX-NEXT: kandb %k2, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $3, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k1 |
| ; SKX-NEXT: kshiftrb $2, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kmovq %k6, %k0 |
| ; SKX-NEXT: kandb %k6, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $6, %k2, %k2 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k4, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k6, %k2 |
| ; SKX-NEXT: kshiftrb $5, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $4, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k3, %k1, %k1 |
| ; SKX-NEXT: kmovq %k3, %k6 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $3, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $2, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k5, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $1, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kandb %k0, %k1, %k1 |
| ; SKX-NEXT: kmovq %k0, %k3 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $6, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k4, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $5, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $4, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kandb %k6, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $3, %k2, %k2 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k0, %k0 |
| ; SKX-NEXT: kshiftrb $2, %k0, %k0 |
| ; SKX-NEXT: korb %k0, %k1, %k0 |
| ; SKX-NEXT: kandb %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kandb %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $6, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $5, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $4, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k6, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $3, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kandb %k7, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $2, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kandb %k5, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k3, %k2, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $6, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k6, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $5, %k2, %k2 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k4, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k6, %k2 |
| ; SKX-NEXT: kshiftrb $4, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k6, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $3, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $2, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k5, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $1, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k0, %k1, %k0 |
| ; SKX-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; SKX-NEXT: kandb %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $6, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k3, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $5, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k5, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $4, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kandb %k6, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $3, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kandb %k7, %k0, %k0 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k1 |
| ; SKX-NEXT: kshiftrb $2, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k4, %k0, %k0 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; SKX-NEXT: kshiftlb $7, %k1, %k1 |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 |
| ; SKX-NEXT: korb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovd %esi, %k1 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k2, %k1, %k1 |
| ; SKX-NEXT: kmovd %edx, %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $6, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k3, %k1, %k1 |
| ; SKX-NEXT: kmovd %ecx, %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $5, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k5, %k1, %k1 |
| ; SKX-NEXT: kmovd %r8d, %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $4, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k6, %k1, %k1 |
| ; SKX-NEXT: kmovd %r9d, %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $3, %k2, %k2 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k7, %k1, %k1 |
| ; SKX-NEXT: kshiftlb $7, %k3, %k2 |
| ; SKX-NEXT: kshiftrb $2, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k4, %k1, %k1 |
| ; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 |
| ; SKX-NEXT: kshiftrb $1, %k2, %k2 |
| ; SKX-NEXT: korb %k2, %k1, %k1 |
| ; SKX-NEXT: kandb %k0, %k1, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; SKX-NEXT: kandb %k1, %k0, %k0 |
| ; SKX-NEXT: kshiftrb $6, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r8d |
| ; SKX-NEXT: kshiftrb $5, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r9d |
| ; SKX-NEXT: kshiftrb $4, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %r10d |
| ; SKX-NEXT: kshiftrb $3, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %edi |
| ; SKX-NEXT: kshiftrb $2, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %ecx |
| ; SKX-NEXT: kshiftrb $1, %k0, %k1 |
| ; SKX-NEXT: kmovd %k1, %edx |
| ; SKX-NEXT: kmovd %k0, %esi |
| ; SKX-NEXT: andb $1, %sil |
| ; SKX-NEXT: andb $1, %dl |
| ; SKX-NEXT: addb %dl, %dl |
| ; SKX-NEXT: orb %sil, %dl |
| ; SKX-NEXT: andb $1, %cl |
| ; SKX-NEXT: shlb $2, %cl |
| ; SKX-NEXT: orb %dl, %cl |
| ; SKX-NEXT: andb $1, %dil |
| ; SKX-NEXT: shlb $3, %dil |
| ; SKX-NEXT: orb %cl, %dil |
| ; SKX-NEXT: andb $1, %r10b |
| ; SKX-NEXT: shlb $4, %r10b |
| ; SKX-NEXT: orb %dil, %r10b |
| ; SKX-NEXT: andb $1, %r9b |
| ; SKX-NEXT: shlb $5, %r9b |
| ; SKX-NEXT: orb %r10b, %r9b |
| ; SKX-NEXT: shlb $6, %r8b |
| ; SKX-NEXT: orb %r9b, %r8b |
| ; SKX-NEXT: andb $127, %r8b |
| ; SKX-NEXT: movb %r8b, (%rax) |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: test17: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebx |
| ; KNL_X32-NEXT: subl $16, %esp |
| ; KNL_X32-NEXT: movw $-3, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 |
| ; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 |
| ; KNL_X32-NEXT: korw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-5, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k1 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k3 |
| ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 |
| ; KNL_X32-NEXT: kshiftrw $13, %k3, %k3 |
| ; KNL_X32-NEXT: korw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-9, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k3 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k4 |
| ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 |
| ; KNL_X32-NEXT: kshiftrw $12, %k4, %k4 |
| ; KNL_X32-NEXT: korw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-17, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k4 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k5 |
| ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 |
| ; KNL_X32-NEXT: kshiftrw $11, %k5, %k5 |
| ; KNL_X32-NEXT: korw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-33, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k5 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k6 |
| ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 |
| ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 |
| ; KNL_X32-NEXT: korw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movw $-65, %ax |
| ; KNL_X32-NEXT: kmovw %eax, %k6 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k0 |
| ; KNL_X32-NEXT: kandw %k2, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k3, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k4, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k5, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: kandw %k6, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k0, %k0 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kandw %k2, %k7, %k2 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k7 |
| ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 |
| ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 |
| ; KNL_X32-NEXT: korw %k7, %k2, %k2 |
| ; KNL_X32-NEXT: kandw %k1, %k2, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: kandw %k3, %k1, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: kandw %k4, %k1, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: kandw %k5, %k1, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: kandw %k6, %k1, %k1 |
| ; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; KNL_X32-NEXT: kmovw %eax, %k2 |
| ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 |
| ; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 |
| ; KNL_X32-NEXT: korw %k2, %k1, %k1 |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload |
| ; KNL_X32-NEXT: kandw %k1, %k0, %k0 |
| ; KNL_X32-NEXT: kshiftrw $6, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %ecx |
| ; KNL_X32-NEXT: kshiftrw $5, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %eax |
| ; KNL_X32-NEXT: kshiftrw $1, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k1, %edx |
| ; KNL_X32-NEXT: kshiftrw $2, %k0, %k1 |
| ; KNL_X32-NEXT: kmovw %k0, %ebx |
| ; KNL_X32-NEXT: andb $1, %bl |
| ; KNL_X32-NEXT: andb $1, %dl |
| ; KNL_X32-NEXT: addb %dl, %dl |
| ; KNL_X32-NEXT: orb %bl, %dl |
| ; KNL_X32-NEXT: kmovw %k1, %ebx |
| ; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 |
| ; KNL_X32-NEXT: andb $1, %bl |
| ; KNL_X32-NEXT: shlb $2, %bl |
| ; KNL_X32-NEXT: orb %dl, %bl |
| ; KNL_X32-NEXT: kmovw %k1, %edx |
| ; KNL_X32-NEXT: kshiftrw $4, %k0, %k0 |
| ; KNL_X32-NEXT: andb $1, %dl |
| ; KNL_X32-NEXT: shlb $3, %dl |
| ; KNL_X32-NEXT: orb %bl, %dl |
| ; KNL_X32-NEXT: kmovw %k0, %ebx |
| ; KNL_X32-NEXT: andb $1, %bl |
| ; KNL_X32-NEXT: shlb $4, %bl |
| ; KNL_X32-NEXT: orb %dl, %bl |
| ; KNL_X32-NEXT: andb $1, %al |
| ; KNL_X32-NEXT: shlb $5, %al |
| ; KNL_X32-NEXT: orb %bl, %al |
| ; KNL_X32-NEXT: shlb $6, %cl |
| ; KNL_X32-NEXT: orb %al, %cl |
| ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; KNL_X32-NEXT: andb $127, %cl |
| ; KNL_X32-NEXT: movb %cl, (%eax) |
| ; KNL_X32-NEXT: addl $16, %esp |
| ; KNL_X32-NEXT: popl %ebx |
| ; KNL_X32-NEXT: retl $4 |
| ; |
| ; FASTISEL-LABEL: test17: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: movq %rdi, %rax |
| ; FASTISEL-NEXT: movb $-3, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: movb $-5, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 |
| ; FASTISEL-NEXT: movb $-9, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k3, %k3 |
| ; FASTISEL-NEXT: kshiftrb $4, %k3, %k3 |
| ; FASTISEL-NEXT: korb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: movb $-17, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k4 |
| ; FASTISEL-NEXT: kshiftlb $7, %k4, %k4 |
| ; FASTISEL-NEXT: kshiftrb $3, %k4, %k4 |
| ; FASTISEL-NEXT: korb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: movb $-33, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k1 |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovq %k1, %k4 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k5 |
| ; FASTISEL-NEXT: kshiftlb $7, %k5, %k5 |
| ; FASTISEL-NEXT: kshiftrb $2, %k5, %k5 |
| ; FASTISEL-NEXT: korb %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k7 |
| ; FASTISEL-NEXT: movb $-65, %dil |
| ; FASTISEL-NEXT: kmovd %edi, %k6 |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k7, %k7 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kshiftrb $1, %k7, %k7 |
| ; FASTISEL-NEXT: korb %k7, %k1, %k7 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k2, %k0, %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: korb %k1, %k2, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k5, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrb $4, %k0, %k0 |
| ; FASTISEL-NEXT: korb %k0, %k1, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kandb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kmovq %k2, %k3 |
| ; FASTISEL-NEXT: kandb %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovq %k4, %k5 |
| ; FASTISEL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 |
| ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovq %k6, %k4 |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 |
| ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kandb %k6, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k5, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k7, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kandb %k3, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrb $2, %k0, %k0 |
| ; FASTISEL-NEXT: korb %k0, %k1, %k0 |
| ; FASTISEL-NEXT: kmovq %k4, %k3 |
| ; FASTISEL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kandb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kmovq %k6, %k4 |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kandb %k2, %k0, %k0 |
| ; FASTISEL-NEXT: kmovq %k2, %k7 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k4, %k2, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kandb %k5, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k4, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k3, %k2 |
| ; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k6, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k7, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $2, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k3, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $1, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k0, %k1, %k0 |
| ; FASTISEL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k5, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $6, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $5, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k4, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $4, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kandb %k6, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $3, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k7, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k1 |
| ; FASTISEL-NEXT: kshiftrb $2, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kandb %k3, %k0, %k0 |
| ; FASTISEL-NEXT: kmovq %k3, %k7 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftrb $1, %k1, %k1 |
| ; FASTISEL-NEXT: korb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovd %esi, %k1 |
| ; FASTISEL-NEXT: kandb %k5, %k1, %k1 |
| ; FASTISEL-NEXT: kmovd %edx, %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $6, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovd %ecx, %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $5, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k4, %k1, %k1 |
| ; FASTISEL-NEXT: kmovd %r8d, %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $4, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k6, %k1, %k1 |
| ; FASTISEL-NEXT: kmovd %r9d, %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $3, %k2, %k2 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k3 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kshiftlb $7, %k3, %k2 |
| ; FASTISEL-NEXT: kshiftrb $2, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k7, %k1, %k1 |
| ; FASTISEL-NEXT: kmovb {{[0-9]+}}(%rsp), %k2 |
| ; FASTISEL-NEXT: kshiftlb $7, %k2, %k2 |
| ; FASTISEL-NEXT: kshiftrb $1, %k2, %k2 |
| ; FASTISEL-NEXT: korb %k2, %k1, %k1 |
| ; FASTISEL-NEXT: kandb %k0, %k1, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload |
| ; FASTISEL-NEXT: kandb %k1, %k0, %k0 |
| ; FASTISEL-NEXT: kshiftrb $6, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r8d |
| ; FASTISEL-NEXT: kshiftrb $5, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r9d |
| ; FASTISEL-NEXT: kshiftrb $4, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %r10d |
| ; FASTISEL-NEXT: kshiftrb $3, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %edi |
| ; FASTISEL-NEXT: kshiftrb $2, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %ecx |
| ; FASTISEL-NEXT: kshiftrb $1, %k0, %k1 |
| ; FASTISEL-NEXT: kmovd %k1, %edx |
| ; FASTISEL-NEXT: kmovd %k0, %esi |
| ; FASTISEL-NEXT: andb $1, %sil |
| ; FASTISEL-NEXT: andb $1, %dl |
| ; FASTISEL-NEXT: addb %dl, %dl |
| ; FASTISEL-NEXT: orb %sil, %dl |
| ; FASTISEL-NEXT: andb $1, %cl |
| ; FASTISEL-NEXT: shlb $2, %cl |
| ; FASTISEL-NEXT: orb %dl, %cl |
| ; FASTISEL-NEXT: andb $1, %dil |
| ; FASTISEL-NEXT: shlb $3, %dil |
| ; FASTISEL-NEXT: orb %cl, %dil |
| ; FASTISEL-NEXT: andb $1, %r10b |
| ; FASTISEL-NEXT: shlb $4, %r10b |
| ; FASTISEL-NEXT: orb %dil, %r10b |
| ; FASTISEL-NEXT: andb $1, %r9b |
| ; FASTISEL-NEXT: shlb $5, %r9b |
| ; FASTISEL-NEXT: orb %r10b, %r9b |
| ; FASTISEL-NEXT: shlb $6, %r8b |
| ; FASTISEL-NEXT: orb %r9b, %r8b |
| ; FASTISEL-NEXT: andb $127, %r8b |
| ; FASTISEL-NEXT: movb %r8b, (%rax) |
| ; FASTISEL-NEXT: retq |
| %j = and <7 x i1> %a, %b |
| %k = and <7 x i1> %j, %c |
| %l = and <7 x i1> %k, %d |
| %m = and <7 x i1> %l, %e |
| %n = and <7 x i1> %m, %f |
| %o = and <7 x i1> %n, %g |
| %p = and <7 x i1> %o, %h |
| %q = and <7 x i1> %p, %i |
| ret <7 x i1> %q |
| } |
| |
| declare void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y) |
| define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) { |
| ; KNL-LABEL: v2i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: subq $24, %rsp |
| ; KNL-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; KNL-NEXT: vmovaps %xmm8, (%rsp) |
| ; KNL-NEXT: callq _v2i1_mem_callee |
| ; KNL-NEXT: addq $24, %rsp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v2i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: subq $24, %rsp |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; SKX-NEXT: vmovaps %xmm8, (%rsp) |
| ; SKX-NEXT: callq _v2i1_mem_callee |
| ; SKX-NEXT: addq $24, %rsp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v2i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $384, %esp ## imm = 0x180 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: vmovaps 264(%ebp), %xmm4 |
| ; KNL_X32-NEXT: vmovaps %xmm4, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v2i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v2i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: subq $24, %rsp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 32 |
| ; FASTISEL-NEXT: vpsllq $63, {{[0-9]+}}(%rsp), %xmm8 |
| ; FASTISEL-NEXT: vpmovq2m %xmm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2q %k0, %xmm8 |
| ; FASTISEL-NEXT: vmovdqa %xmm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v2i1_mem_callee |
| ; FASTISEL-NEXT: addq $24, %rsp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y) |
| ret void |
| } |
| |
| declare void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y) |
| define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) { |
| ; KNL-LABEL: v4i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: subq $24, %rsp |
| ; KNL-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; KNL-NEXT: vmovaps %xmm8, (%rsp) |
| ; KNL-NEXT: callq _v4i1_mem_callee |
| ; KNL-NEXT: addq $24, %rsp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v4i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: subq $24, %rsp |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; SKX-NEXT: vmovaps %xmm8, (%rsp) |
| ; SKX-NEXT: callq _v4i1_mem_callee |
| ; SKX-NEXT: addq $24, %rsp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v4i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $384, %esp ## imm = 0x180 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: vmovaps 264(%ebp), %xmm4 |
| ; KNL_X32-NEXT: vmovaps %xmm4, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v4i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v4i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: subq $24, %rsp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 32 |
| ; FASTISEL-NEXT: vpslld $31, {{[0-9]+}}(%rsp), %xmm8 |
| ; FASTISEL-NEXT: vpmovd2m %xmm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2d %k0, %xmm8 |
| ; FASTISEL-NEXT: vmovdqa %xmm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v4i1_mem_callee |
| ; FASTISEL-NEXT: addq $24, %rsp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y) |
| ret void |
| } |
| |
| declare void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y) |
| define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) { |
| ; KNL-LABEL: v8i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: subq $24, %rsp |
| ; KNL-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; KNL-NEXT: vmovaps %xmm8, (%rsp) |
| ; KNL-NEXT: callq _v8i1_mem_callee |
| ; KNL-NEXT: addq $24, %rsp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v8i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: subq $24, %rsp |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; SKX-NEXT: vmovaps %xmm8, (%rsp) |
| ; SKX-NEXT: callq _v8i1_mem_callee |
| ; SKX-NEXT: addq $24, %rsp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v8i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $384, %esp ## imm = 0x180 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: vmovaps 264(%ebp), %xmm4 |
| ; KNL_X32-NEXT: vmovaps %xmm4, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v8i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v8i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: subq $24, %rsp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 32 |
| ; FASTISEL-NEXT: vpsllw $15, {{[0-9]+}}(%rsp), %xmm8 |
| ; FASTISEL-NEXT: vpmovw2m %xmm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2w %k0, %xmm8 |
| ; FASTISEL-NEXT: vmovdqa %xmm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v8i1_mem_callee |
| ; FASTISEL-NEXT: addq $24, %rsp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y) |
| ret void |
| } |
| |
| declare void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y) |
| define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) { |
| ; KNL-LABEL: v16i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: subq $24, %rsp |
| ; KNL-NEXT: .cfi_def_cfa_offset 32 |
| ; KNL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; KNL-NEXT: vmovaps %xmm8, (%rsp) |
| ; KNL-NEXT: callq _v16i1_mem_callee |
| ; KNL-NEXT: addq $24, %rsp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v16i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: subq $24, %rsp |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm8 |
| ; SKX-NEXT: vmovaps %xmm8, (%rsp) |
| ; SKX-NEXT: callq _v16i1_mem_callee |
| ; SKX-NEXT: addq $24, %rsp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v16i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $384, %esp ## imm = 0x180 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: vmovaps 264(%ebp), %xmm4 |
| ; KNL_X32-NEXT: vmovaps %xmm4, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v16i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v16i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: subq $24, %rsp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 32 |
| ; FASTISEL-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %xmm8 |
| ; FASTISEL-NEXT: vpmovb2m %xmm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2b %k0, %xmm8 |
| ; FASTISEL-NEXT: vmovdqa %xmm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v16i1_mem_callee |
| ; FASTISEL-NEXT: addq $24, %rsp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y) |
| ret void |
| } |
| |
| declare void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y) |
| define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) { |
| ; KNL-LABEL: v32i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: pushq %rbp |
| ; KNL-NEXT: .cfi_def_cfa_offset 16 |
| ; KNL-NEXT: .cfi_offset %rbp, -16 |
| ; KNL-NEXT: movq %rsp, %rbp |
| ; KNL-NEXT: .cfi_def_cfa_register %rbp |
| ; KNL-NEXT: andq $-32, %rsp |
| ; KNL-NEXT: subq $64, %rsp |
| ; KNL-NEXT: vmovaps 16(%rbp), %ymm8 |
| ; KNL-NEXT: vmovaps %ymm8, (%rsp) |
| ; KNL-NEXT: callq _v32i1_mem_callee |
| ; KNL-NEXT: movq %rbp, %rsp |
| ; KNL-NEXT: popq %rbp |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v32i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rbp |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: .cfi_offset %rbp, -16 |
| ; SKX-NEXT: movq %rsp, %rbp |
| ; SKX-NEXT: .cfi_def_cfa_register %rbp |
| ; SKX-NEXT: andq $-32, %rsp |
| ; SKX-NEXT: subq $64, %rsp |
| ; SKX-NEXT: vmovaps 16(%rbp), %ymm8 |
| ; SKX-NEXT: vmovaps %ymm8, (%rsp) |
| ; SKX-NEXT: callq _v32i1_mem_callee |
| ; SKX-NEXT: movq %rbp, %rsp |
| ; SKX-NEXT: popq %rbp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v32i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $384, %esp ## imm = 0x180 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: vmovaps 264(%ebp), %ymm4 |
| ; KNL_X32-NEXT: vmovaps %ymm4, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v32i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v32i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: .cfi_offset %rbp, -16 |
| ; FASTISEL-NEXT: movq %rsp, %rbp |
| ; FASTISEL-NEXT: .cfi_def_cfa_register %rbp |
| ; FASTISEL-NEXT: andq $-32, %rsp |
| ; FASTISEL-NEXT: subq $64, %rsp |
| ; FASTISEL-NEXT: vpsllw $7, 16(%rbp), %ymm8 |
| ; FASTISEL-NEXT: vpmovb2m %ymm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2b %k0, %ymm8 |
| ; FASTISEL-NEXT: vmovdqa %ymm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v32i1_mem_callee |
| ; FASTISEL-NEXT: movq %rbp, %rsp |
| ; FASTISEL-NEXT: popq %rbp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y) |
| ret void |
| } |
| |
| declare void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y) |
| define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) { |
| ; KNL-LABEL: v64i1_mem: |
| ; KNL: ## %bb.0: |
| ; KNL-NEXT: subq $472, %rsp ## imm = 0x1D8 |
| ; KNL-NEXT: .cfi_def_cfa_offset 480 |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) |
| ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax |
| ; KNL-NEXT: movl %eax, (%rsp) |
| ; KNL-NEXT: callq _v64i1_mem_callee |
| ; KNL-NEXT: addq $472, %rsp ## imm = 0x1D8 |
| ; KNL-NEXT: retq |
| ; |
| ; SKX-LABEL: v64i1_mem: |
| ; SKX: ## %bb.0: |
| ; SKX-NEXT: pushq %rbp |
| ; SKX-NEXT: .cfi_def_cfa_offset 16 |
| ; SKX-NEXT: .cfi_offset %rbp, -16 |
| ; SKX-NEXT: movq %rsp, %rbp |
| ; SKX-NEXT: .cfi_def_cfa_register %rbp |
| ; SKX-NEXT: andq $-64, %rsp |
| ; SKX-NEXT: subq $128, %rsp |
| ; SKX-NEXT: vmovaps 16(%rbp), %zmm8 |
| ; SKX-NEXT: vmovaps %zmm8, (%rsp) |
| ; SKX-NEXT: callq _v64i1_mem_callee |
| ; SKX-NEXT: movq %rbp, %rsp |
| ; SKX-NEXT: popq %rbp |
| ; SKX-NEXT: vzeroupper |
| ; SKX-NEXT: retq |
| ; |
| ; KNL_X32-LABEL: v64i1_mem: |
| ; KNL_X32: ## %bb.0: |
| ; KNL_X32-NEXT: pushl %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_offset 8 |
| ; KNL_X32-NEXT: .cfi_offset %ebp, -8 |
| ; KNL_X32-NEXT: movl %esp, %ebp |
| ; KNL_X32-NEXT: .cfi_def_cfa_register %ebp |
| ; KNL_X32-NEXT: andl $-64, %esp |
| ; KNL_X32-NEXT: subl $576, %esp ## imm = 0x240 |
| ; KNL_X32-NEXT: vmovaps 8(%ebp), %zmm4 |
| ; KNL_X32-NEXT: vmovaps 72(%ebp), %zmm5 |
| ; KNL_X32-NEXT: vmovaps 136(%ebp), %zmm6 |
| ; KNL_X32-NEXT: vmovaps 200(%ebp), %zmm7 |
| ; KNL_X32-NEXT: movl 516(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 512(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 508(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 504(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 500(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 496(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 492(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 488(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 484(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 480(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 476(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 472(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 468(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 464(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 460(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 456(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 452(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 448(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 444(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 440(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 436(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 432(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 428(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 424(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 420(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 416(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 412(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 408(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 404(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 400(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 396(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 392(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 388(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 384(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 380(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 376(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 372(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 368(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 364(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 360(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 356(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 352(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 348(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 344(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 340(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 336(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 332(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 328(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 324(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 320(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 316(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 312(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 308(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 304(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 300(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 296(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 292(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 288(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 284(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 280(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 276(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 272(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 268(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: movl 264(%ebp), %eax |
| ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm7, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm6, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm5, {{[0-9]+}}(%esp) |
| ; KNL_X32-NEXT: vmovaps %zmm4, (%esp) |
| ; KNL_X32-NEXT: calll _v64i1_mem_callee |
| ; KNL_X32-NEXT: movl %ebp, %esp |
| ; KNL_X32-NEXT: popl %ebp |
| ; KNL_X32-NEXT: retl |
| ; |
| ; FASTISEL-LABEL: v64i1_mem: |
| ; FASTISEL: ## %bb.0: |
| ; FASTISEL-NEXT: pushq %rbp |
| ; FASTISEL-NEXT: .cfi_def_cfa_offset 16 |
| ; FASTISEL-NEXT: .cfi_offset %rbp, -16 |
| ; FASTISEL-NEXT: movq %rsp, %rbp |
| ; FASTISEL-NEXT: .cfi_def_cfa_register %rbp |
| ; FASTISEL-NEXT: andq $-64, %rsp |
| ; FASTISEL-NEXT: subq $128, %rsp |
| ; FASTISEL-NEXT: vpsllw $7, 16(%rbp), %zmm8 |
| ; FASTISEL-NEXT: vpmovb2m %zmm8, %k0 |
| ; FASTISEL-NEXT: vpmovm2b %k0, %zmm8 |
| ; FASTISEL-NEXT: vmovdqa64 %zmm8, (%rsp) |
| ; FASTISEL-NEXT: callq _v64i1_mem_callee |
| ; FASTISEL-NEXT: movq %rbp, %rsp |
| ; FASTISEL-NEXT: popq %rbp |
| ; FASTISEL-NEXT: vzeroupper |
| ; FASTISEL-NEXT: retq |
| call void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y) |
| ret void |
| } |