| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 |
| |
| |
| define <32 x i4> @avir_v4i4_to_v32i4(<4 x i4> %arg) { |
| ; AVX1-LABEL: avir_v4i4_to_v32i4: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] |
| ; AVX1-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax |
| ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $4, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: movl %ecx, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: vmovd %esi, %xmm0 |
| ; AVX1-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $8, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $12, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $16, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $20, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $24, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %ecx, %edx |
| ; AVX1-NEXT: shrl $28, %edx |
| ; AVX1-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $32, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $36, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $40, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $44, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $48, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $52, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movq %rcx, %rdx |
| ; AVX1-NEXT: shrq $56, %rdx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 |
| ; AVX1-NEXT: shrq $60, %rcx |
| ; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $4, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: movl %eax, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vmovd %edx, %xmm1 |
| ; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $8, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $12, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $16, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $20, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $24, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movl %eax, %ecx |
| ; AVX1-NEXT: shrl $28, %ecx |
| ; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $32, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $36, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $40, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $44, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $48, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $52, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: movq %rax, %rcx |
| ; AVX1-NEXT: shrq $56, %rcx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 |
| ; AVX1-NEXT: shrq $60, %rax |
| ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: avir_v4i4_to_v32i4: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] |
| ; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax |
| ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $4, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: movl %ecx, %esi |
| ; AVX2-NEXT: andl $15, %esi |
| ; AVX2-NEXT: vmovd %esi, %xmm0 |
| ; AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $8, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $12, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $16, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $20, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $24, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $28, %edx |
| ; AVX2-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $32, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $36, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $40, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $44, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $48, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $52, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: shrq $56, %rdx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 |
| ; AVX2-NEXT: shrq $60, %rcx |
| ; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $4, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: movl %eax, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vmovd %edx, %xmm1 |
| ; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $8, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $12, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $16, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $20, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $24, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $28, %ecx |
| ; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $32, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $36, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $40, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $44, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $48, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $52, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq $56, %rcx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 |
| ; AVX2-NEXT: shrq $60, %rax |
| ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 |
| ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: avir_v4i4_to_v32i4: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] |
| ; AVX512-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq -{{[0-9]+}}(%rsp), %rax |
| ; AVX512-NEXT: movq -{{[0-9]+}}(%rsp), %rcx |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $4, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: movl %ecx, %esi |
| ; AVX512-NEXT: andl $15, %esi |
| ; AVX512-NEXT: vmovd %esi, %xmm0 |
| ; AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $8, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $12, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $16, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $20, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $24, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %ecx, %edx |
| ; AVX512-NEXT: shrl $28, %edx |
| ; AVX512-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $32, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $36, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $40, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $44, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $48, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $52, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movq %rcx, %rdx |
| ; AVX512-NEXT: shrq $56, %rdx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 |
| ; AVX512-NEXT: shrq $60, %rcx |
| ; AVX512-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $4, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: movl %eax, %edx |
| ; AVX512-NEXT: andl $15, %edx |
| ; AVX512-NEXT: vmovd %edx, %xmm1 |
| ; AVX512-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $8, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $12, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $16, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $20, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $24, %ecx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $28, %ecx |
| ; AVX512-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $32, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $36, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $40, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $44, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $48, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $52, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: movq %rax, %rcx |
| ; AVX512-NEXT: shrq $56, %rcx |
| ; AVX512-NEXT: andl $15, %ecx |
| ; AVX512-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 |
| ; AVX512-NEXT: shrq $60, %rax |
| ; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 |
| ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 |
| ; AVX512-NEXT: retq |
| %res = shufflevector <4 x i4> %arg, <4 x i4> poison, |
| <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1 , i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <32 x i4> %res |
| } |
| |
| define <64 x i4> @avir_v4i4_to_v64i4(<4 x i4> %arg) { |
| ; AVX-LABEL: avir_v4i4_to_v64i4: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movq %rdi, %rax |
| ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] |
| ; AVX-NEXT: vmovdqa %xmm0, (%rdi) |
| ; AVX-NEXT: retq |
| %res = shufflevector <4 x i4> %arg, <4 x i4> poison, |
| <64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <64 x i4> %res |
| } |
| |
| define <64 x i4> @avir_v8i4_to_v64i4(<8 x i4> %arg) { |
| ; AVX-LABEL: avir_v8i4_to_v64i4: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movq %rdi, %rax |
| ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] |
| ; AVX-NEXT: vmovdqa %xmm0, (%rdi) |
| ; AVX-NEXT: retq |
| %res = shufflevector <8 x i4> %arg, <8 x i4> poison, |
| <64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <64 x i4> %res |
| } |
| |
| define <64 x i4> @avir_v16i4_to_v64i4(<16 x i4> %arg) { |
| ; AVX-LABEL: avir_v16i4_to_v64i4: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movq %rdi, %rax |
| ; AVX-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX-NEXT: retq |
| %res = shufflevector <16 x i4> %arg, <16 x i4> poison, |
| <64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <64 x i4> %res |
| } |
| |
| define <128 x i4> @avir_v4i4_to_v128i4(<4 x i4> %arg) { |
| ; AVX1-LABEL: avir_v4i4_to_v128i4: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: movq %rdi, %rax |
| ; AVX1-NEXT: vpextrb $8, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: movq %rdx, %rcx |
| ; AVX1-NEXT: shlq $32, %rcx |
| ; AVX1-NEXT: vmovd %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: vpextrb $1, %xmm0, %edi |
| ; AVX1-NEXT: andl $15, %edi |
| ; AVX1-NEXT: shll $4, %edi |
| ; AVX1-NEXT: orl %esi, %edi |
| ; AVX1-NEXT: vpextrb $2, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $8, %esi |
| ; AVX1-NEXT: orl %edi, %esi |
| ; AVX1-NEXT: vpextrb $3, %xmm0, %edi |
| ; AVX1-NEXT: andl $15, %edi |
| ; AVX1-NEXT: shll $12, %edi |
| ; AVX1-NEXT: orl %esi, %edi |
| ; AVX1-NEXT: shll $16, %edx |
| ; AVX1-NEXT: orl %edi, %edx |
| ; AVX1-NEXT: vpextrb $9, %xmm0, %edi |
| ; AVX1-NEXT: andl $15, %edi |
| ; AVX1-NEXT: movl %edi, %r8d |
| ; AVX1-NEXT: shll $20, %r8d |
| ; AVX1-NEXT: orl %edx, %r8d |
| ; AVX1-NEXT: vpextrb $12, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: movl %esi, %r9d |
| ; AVX1-NEXT: shll $24, %r9d |
| ; AVX1-NEXT: vpextrb $13, %xmm0, %edx |
| ; AVX1-NEXT: movl %edx, %r10d |
| ; AVX1-NEXT: shll $28, %r10d |
| ; AVX1-NEXT: orl %r9d, %r10d |
| ; AVX1-NEXT: orl %r8d, %r10d |
| ; AVX1-NEXT: orq %rcx, %r10 |
| ; AVX1-NEXT: shlq $36, %rdi |
| ; AVX1-NEXT: orq %r10, %rdi |
| ; AVX1-NEXT: movq %rsi, %rcx |
| ; AVX1-NEXT: shlq $40, %rcx |
| ; AVX1-NEXT: orq %rdi, %rcx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: movq %rdx, %rdi |
| ; AVX1-NEXT: shlq $44, %rdi |
| ; AVX1-NEXT: orq %rcx, %rdi |
| ; AVX1-NEXT: shlq $48, %rsi |
| ; AVX1-NEXT: shlq $52, %rdx |
| ; AVX1-NEXT: orq %rsi, %rdx |
| ; AVX1-NEXT: vpextrb $14, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $56, %rcx |
| ; AVX1-NEXT: orq %rdx, %rcx |
| ; AVX1-NEXT: vpextrb $15, %xmm0, %edx |
| ; AVX1-NEXT: shlq $60, %rdx |
| ; AVX1-NEXT: orq %rcx, %rdx |
| ; AVX1-NEXT: orq %rdi, %rdx |
| ; AVX1-NEXT: movq %rdx, (%rax) |
| ; AVX1-NEXT: movq $0, 8(%rax) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: avir_v4i4_to_v128i4: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,8,12,u,u,u,u,u,u,u,u,u,u,u,u] |
| ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpextrb $8, %xmm1, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm1, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm1, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, 8(%rdi) |
| ; AVX2-NEXT: vpextrb $8, %xmm0, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm0, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm0, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: avir_v4i4_to_v128i4: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] |
| ; AVX512-NEXT: vmovdqa %xmm0, (%rdi) |
| ; AVX512-NEXT: retq |
| %res = shufflevector <4 x i4> %arg, <4 x i4> poison, |
| <128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <128 x i4> %res |
| } |
| |
| define <128 x i4> @avir_v8i4_to_v128i4(<8 x i4> %arg) { |
| ; AVX1-LABEL: avir_v8i4_to_v128i4: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: movq %rdi, %rax |
| ; AVX1-NEXT: vpextrb $8, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $32, %rcx |
| ; AVX1-NEXT: vmovd %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpextrb $1, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $4, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: vpextrb $2, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $8, %edx |
| ; AVX1-NEXT: orl %esi, %edx |
| ; AVX1-NEXT: vpextrb $3, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $12, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: vpextrb $4, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $16, %edx |
| ; AVX1-NEXT: orl %esi, %edx |
| ; AVX1-NEXT: vpextrb $5, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $20, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: vpextrb $6, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $24, %edx |
| ; AVX1-NEXT: vpextrb $7, %xmm0, %edi |
| ; AVX1-NEXT: shll $28, %edi |
| ; AVX1-NEXT: orl %edx, %edi |
| ; AVX1-NEXT: orl %esi, %edi |
| ; AVX1-NEXT: orq %rcx, %rdi |
| ; AVX1-NEXT: vpextrb $9, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $36, %rcx |
| ; AVX1-NEXT: orq %rdi, %rcx |
| ; AVX1-NEXT: vpextrb $10, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shlq $40, %rdx |
| ; AVX1-NEXT: orq %rcx, %rdx |
| ; AVX1-NEXT: vpextrb $11, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $44, %rcx |
| ; AVX1-NEXT: orq %rdx, %rcx |
| ; AVX1-NEXT: vpextrb $12, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shlq $48, %rdx |
| ; AVX1-NEXT: vpextrb $13, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shlq $52, %rsi |
| ; AVX1-NEXT: orq %rdx, %rsi |
| ; AVX1-NEXT: vpextrb $14, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shlq $56, %rdx |
| ; AVX1-NEXT: orq %rsi, %rdx |
| ; AVX1-NEXT: vpextrb $15, %xmm0, %esi |
| ; AVX1-NEXT: shlq $60, %rsi |
| ; AVX1-NEXT: orq %rdx, %rsi |
| ; AVX1-NEXT: orq %rcx, %rsi |
| ; AVX1-NEXT: movq %rsi, (%rax) |
| ; AVX1-NEXT: movq $0, 8(%rax) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: avir_v8i4_to_v128i4: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,4,6,u,u,u,u,u,u,u,u,u,u,u,u] |
| ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpextrb $8, %xmm1, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm1, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm1, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, 8(%rdi) |
| ; AVX2-NEXT: vpextrb $8, %xmm0, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm0, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm0, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: avir_v8i4_to_v128i4: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] |
| ; AVX512-NEXT: vmovdqa %xmm0, (%rdi) |
| ; AVX512-NEXT: retq |
| %res = shufflevector <8 x i4> %arg, <8 x i4> poison, |
| <128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <128 x i4> %res |
| } |
| |
| define <128 x i4> @avir_v16i4_to_v128i4(<16 x i4> %arg) { |
| ; AVX1-LABEL: avir_v16i4_to_v128i4: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: movq %rdi, %rax |
| ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] |
| ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| ; AVX1-NEXT: vpextrb $12, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $32, %rcx |
| ; AVX1-NEXT: vmovd %xmm2, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: vpextrb $9, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $8, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: vpextrb $10, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $16, %edx |
| ; AVX1-NEXT: orl %esi, %edx |
| ; AVX1-NEXT: vpextrb $11, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $24, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: orq %rcx, %rsi |
| ; AVX1-NEXT: vpextrb $13, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $40, %rcx |
| ; AVX1-NEXT: orq %rsi, %rcx |
| ; AVX1-NEXT: vpextrb $14, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shlq $48, %rdx |
| ; AVX1-NEXT: orq %rcx, %rdx |
| ; AVX1-NEXT: vpextrb $15, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $56, %rcx |
| ; AVX1-NEXT: orq %rdx, %rcx |
| ; AVX1-NEXT: movq %rcx, 8(%rdi) |
| ; AVX1-NEXT: vpextrb $4, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $32, %rcx |
| ; AVX1-NEXT: vpextrb $1, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $8, %edx |
| ; AVX1-NEXT: vmovd %xmm1, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: vpextrb $2, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shll $16, %edx |
| ; AVX1-NEXT: orl %esi, %edx |
| ; AVX1-NEXT: vpextrb $3, %xmm0, %esi |
| ; AVX1-NEXT: andl $15, %esi |
| ; AVX1-NEXT: shll $24, %esi |
| ; AVX1-NEXT: orl %edx, %esi |
| ; AVX1-NEXT: orq %rcx, %rsi |
| ; AVX1-NEXT: vpextrb $5, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $40, %rcx |
| ; AVX1-NEXT: orq %rsi, %rcx |
| ; AVX1-NEXT: vpextrb $6, %xmm0, %edx |
| ; AVX1-NEXT: andl $15, %edx |
| ; AVX1-NEXT: shlq $48, %rdx |
| ; AVX1-NEXT: orq %rcx, %rdx |
| ; AVX1-NEXT: vpextrb $7, %xmm0, %ecx |
| ; AVX1-NEXT: andl $15, %ecx |
| ; AVX1-NEXT: shlq $56, %rcx |
| ; AVX1-NEXT: orq %rdx, %rcx |
| ; AVX1-NEXT: movq %rcx, (%rdi) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: avir_v16i4_to_v128i4: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpextrb $8, %xmm1, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm1, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm1, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm1, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm1, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm1, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, 8(%rdi) |
| ; AVX2-NEXT: vpextrb $8, %xmm0, %ecx |
| ; AVX2-NEXT: andl $15, %ecx |
| ; AVX2-NEXT: shlq $32, %rcx |
| ; AVX2-NEXT: vmovd %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: vpextrb $1, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $4, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $2, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $8, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $3, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $12, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $4, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $16, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $5, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shll $20, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: vpextrb $6, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shll $24, %edx |
| ; AVX2-NEXT: orl %esi, %edx |
| ; AVX2-NEXT: vpextrb $7, %xmm0, %esi |
| ; AVX2-NEXT: shll $28, %esi |
| ; AVX2-NEXT: orl %edx, %esi |
| ; AVX2-NEXT: orq %rcx, %rsi |
| ; AVX2-NEXT: vpextrb $9, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $36, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: vpextrb $10, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $40, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $11, %xmm0, %ecx |
| ; AVX2-NEXT: movzwl %cx, %ecx |
| ; AVX2-NEXT: shlq $44, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: vpextrb $12, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $48, %rdx |
| ; AVX2-NEXT: vpextrb $13, %xmm0, %esi |
| ; AVX2-NEXT: movzwl %si, %esi |
| ; AVX2-NEXT: shlq $52, %rsi |
| ; AVX2-NEXT: orq %rdx, %rsi |
| ; AVX2-NEXT: vpextrb $14, %xmm0, %edx |
| ; AVX2-NEXT: andl $15, %edx |
| ; AVX2-NEXT: shlq $56, %rdx |
| ; AVX2-NEXT: orq %rsi, %rdx |
| ; AVX2-NEXT: orq %rcx, %rdx |
| ; AVX2-NEXT: vpextrb $15, %xmm0, %ecx |
| ; AVX2-NEXT: shlq $60, %rcx |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: avir_v16i4_to_v128i4: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX512-NEXT: retq |
| %res = shufflevector <16 x i4> %arg, <16 x i4> poison, |
| <128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, |
| i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ret <128 x i4> %res |
| } |