blob: a99f05fe82b64acc52e82cb80f48500dc02c4b88 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
define <32 x i4> @avir_v4i4_to_v32i4(<4 x i4> %arg) {
; AVX1-LABEL: avir_v4i4_to_v32i4:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $4, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: movl %ecx, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: vmovd %esi, %xmm0
; AVX1-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $8, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $12, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $16, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $20, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $24, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
; AVX1-NEXT: movl %ecx, %edx
; AVX1-NEXT: shrl $28, %edx
; AVX1-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $32, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $36, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $40, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $44, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $48, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $52, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $56, %rdx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
; AVX1-NEXT: shrq $60, %rcx
; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $4, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: movl %eax, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vmovd %edx, %xmm1
; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $8, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $12, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $16, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $20, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $24, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $28, %ecx
; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $32, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $36, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $40, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $44, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $48, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $52, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq $56, %rcx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
; AVX1-NEXT: shrq $60, %rax
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: avir_v4i4_to_v32i4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $4, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: movl %ecx, %esi
; AVX2-NEXT: andl $15, %esi
; AVX2-NEXT: vmovd %esi, %xmm0
; AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $8, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $12, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $16, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $20, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $24, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
; AVX2-NEXT: movl %ecx, %edx
; AVX2-NEXT: shrl $28, %edx
; AVX2-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $32, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $36, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $40, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $44, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $48, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $52, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: shrq $56, %rdx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
; AVX2-NEXT: shrq $60, %rcx
; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $4, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: movl %eax, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vmovd %edx, %xmm1
; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $8, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $12, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $20, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $24, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $28, %ecx
; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $32, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $36, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $40, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $44, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $48, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $52, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shrq $56, %rcx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
; AVX2-NEXT: shrq $60, %rax
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: avir_v4i4_to_v32i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX512-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX512-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $4, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: movl %ecx, %esi
; AVX512-NEXT: andl $15, %esi
; AVX512-NEXT: vmovd %esi, %xmm0
; AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $8, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $12, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $16, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $20, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $5, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $24, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
; AVX512-NEXT: movl %ecx, %edx
; AVX512-NEXT: shrl $28, %edx
; AVX512-NEXT: vpinsrb $7, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $32, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $36, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $9, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $40, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $10, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $44, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $11, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $48, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $52, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; AVX512-NEXT: movq %rcx, %rdx
; AVX512-NEXT: shrq $56, %rdx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
; AVX512-NEXT: shrq $60, %rcx
; AVX512-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $4, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: movl %eax, %edx
; AVX512-NEXT: andl $15, %edx
; AVX512-NEXT: vmovd %edx, %xmm1
; AVX512-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $8, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $12, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $16, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $20, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $24, %ecx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shrl $28, %ecx
; AVX512-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $32, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $36, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $40, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $44, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $48, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $52, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: shrq $56, %rcx
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
; AVX512-NEXT: shrq $60, %rax
; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%res = shufflevector <4 x i4> %arg, <4 x i4> poison,
<32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1 , i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <32 x i4> %res
}
define <64 x i4> @avir_v4i4_to_v64i4(<4 x i4> %arg) {
; AVX-LABEL: avir_v4i4_to_v64i4:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: vmovdqa %xmm0, (%rdi)
; AVX-NEXT: retq
%res = shufflevector <4 x i4> %arg, <4 x i4> poison,
<64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <64 x i4> %res
}
define <64 x i4> @avir_v8i4_to_v64i4(<8 x i4> %arg) {
; AVX-LABEL: avir_v8i4_to_v64i4:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX-NEXT: vmovdqa %xmm0, (%rdi)
; AVX-NEXT: retq
%res = shufflevector <8 x i4> %arg, <8 x i4> poison,
<64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <64 x i4> %res
}
define <64 x i4> @avir_v16i4_to_v64i4(<16 x i4> %arg) {
; AVX-LABEL: avir_v16i4_to_v64i4:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: vmovaps %xmm0, (%rdi)
; AVX-NEXT: retq
%res = shufflevector <16 x i4> %arg, <16 x i4> poison,
<64 x i32> <i32 0 , i32 poison, i32 1 , i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <64 x i4> %res
}
define <128 x i4> @avir_v4i4_to_v128i4(<4 x i4> %arg) {
; AVX1-LABEL: avir_v4i4_to_v128i4:
; AVX1: # %bb.0:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vpextrb $8, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: movq %rdx, %rcx
; AVX1-NEXT: shlq $32, %rcx
; AVX1-NEXT: vmovd %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: vpextrb $1, %xmm0, %edi
; AVX1-NEXT: andl $15, %edi
; AVX1-NEXT: shll $4, %edi
; AVX1-NEXT: orl %esi, %edi
; AVX1-NEXT: vpextrb $2, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $8, %esi
; AVX1-NEXT: orl %edi, %esi
; AVX1-NEXT: vpextrb $3, %xmm0, %edi
; AVX1-NEXT: andl $15, %edi
; AVX1-NEXT: shll $12, %edi
; AVX1-NEXT: orl %esi, %edi
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %edi, %edx
; AVX1-NEXT: vpextrb $9, %xmm0, %edi
; AVX1-NEXT: andl $15, %edi
; AVX1-NEXT: movl %edi, %r8d
; AVX1-NEXT: shll $20, %r8d
; AVX1-NEXT: orl %edx, %r8d
; AVX1-NEXT: vpextrb $12, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: movl %esi, %r9d
; AVX1-NEXT: shll $24, %r9d
; AVX1-NEXT: vpextrb $13, %xmm0, %edx
; AVX1-NEXT: movl %edx, %r10d
; AVX1-NEXT: shll $28, %r10d
; AVX1-NEXT: orl %r9d, %r10d
; AVX1-NEXT: orl %r8d, %r10d
; AVX1-NEXT: orq %rcx, %r10
; AVX1-NEXT: shlq $36, %rdi
; AVX1-NEXT: orq %r10, %rdi
; AVX1-NEXT: movq %rsi, %rcx
; AVX1-NEXT: shlq $40, %rcx
; AVX1-NEXT: orq %rdi, %rcx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: movq %rdx, %rdi
; AVX1-NEXT: shlq $44, %rdi
; AVX1-NEXT: orq %rcx, %rdi
; AVX1-NEXT: shlq $48, %rsi
; AVX1-NEXT: shlq $52, %rdx
; AVX1-NEXT: orq %rsi, %rdx
; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $56, %rcx
; AVX1-NEXT: orq %rdx, %rcx
; AVX1-NEXT: vpextrb $15, %xmm0, %edx
; AVX1-NEXT: shlq $60, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: orq %rdi, %rdx
; AVX1-NEXT: movq %rdx, (%rax)
; AVX1-NEXT: movq $0, 8(%rax)
; AVX1-NEXT: retq
;
; AVX2-LABEL: avir_v4i4_to_v128i4:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm1, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm1, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, 8(%rdi)
; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm0, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm0, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: avir_v4i4_to_v128i4:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512-NEXT: retq
%res = shufflevector <4 x i4> %arg, <4 x i4> poison,
<128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <128 x i4> %res
}
define <128 x i4> @avir_v8i4_to_v128i4(<8 x i4> %arg) {
; AVX1-LABEL: avir_v8i4_to_v128i4:
; AVX1: # %bb.0:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $32, %rcx
; AVX1-NEXT: vmovd %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpextrb $1, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $4, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: vpextrb $2, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $8, %edx
; AVX1-NEXT: orl %esi, %edx
; AVX1-NEXT: vpextrb $3, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $12, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: vpextrb $4, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %esi, %edx
; AVX1-NEXT: vpextrb $5, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $20, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: vpextrb $6, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $24, %edx
; AVX1-NEXT: vpextrb $7, %xmm0, %edi
; AVX1-NEXT: shll $28, %edi
; AVX1-NEXT: orl %edx, %edi
; AVX1-NEXT: orl %esi, %edi
; AVX1-NEXT: orq %rcx, %rdi
; AVX1-NEXT: vpextrb $9, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $36, %rcx
; AVX1-NEXT: orq %rdi, %rcx
; AVX1-NEXT: vpextrb $10, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shlq $40, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vpextrb $11, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $44, %rcx
; AVX1-NEXT: orq %rdx, %rcx
; AVX1-NEXT: vpextrb $12, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shlq $48, %rdx
; AVX1-NEXT: vpextrb $13, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shlq $52, %rsi
; AVX1-NEXT: orq %rdx, %rsi
; AVX1-NEXT: vpextrb $14, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shlq $56, %rdx
; AVX1-NEXT: orq %rsi, %rdx
; AVX1-NEXT: vpextrb $15, %xmm0, %esi
; AVX1-NEXT: shlq $60, %rsi
; AVX1-NEXT: orq %rdx, %rsi
; AVX1-NEXT: orq %rcx, %rsi
; AVX1-NEXT: movq %rsi, (%rax)
; AVX1-NEXT: movq $0, 8(%rax)
; AVX1-NEXT: retq
;
; AVX2-LABEL: avir_v8i4_to_v128i4:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm1, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm1, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, 8(%rdi)
; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm0, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm0, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: avir_v8i4_to_v128i4:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX512-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512-NEXT: retq
%res = shufflevector <8 x i4> %arg, <8 x i4> poison,
<128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <128 x i4> %res
}
define <128 x i4> @avir_v16i4_to_v128i4(<16 x i4> %arg) {
; AVX1-LABEL: avir_v16i4_to_v128i4:
; AVX1: # %bb.0:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $32, %rcx
; AVX1-NEXT: vmovd %xmm2, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: vpextrb $9, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $8, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: vpextrb $10, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %esi, %edx
; AVX1-NEXT: vpextrb $11, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $24, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: orq %rcx, %rsi
; AVX1-NEXT: vpextrb $13, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $40, %rcx
; AVX1-NEXT: orq %rsi, %rcx
; AVX1-NEXT: vpextrb $14, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shlq $48, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vpextrb $15, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $56, %rcx
; AVX1-NEXT: orq %rdx, %rcx
; AVX1-NEXT: movq %rcx, 8(%rdi)
; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $32, %rcx
; AVX1-NEXT: vpextrb $1, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $8, %edx
; AVX1-NEXT: vmovd %xmm1, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: vpextrb $2, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %esi, %edx
; AVX1-NEXT: vpextrb $3, %xmm0, %esi
; AVX1-NEXT: andl $15, %esi
; AVX1-NEXT: shll $24, %esi
; AVX1-NEXT: orl %edx, %esi
; AVX1-NEXT: orq %rcx, %rsi
; AVX1-NEXT: vpextrb $5, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $40, %rcx
; AVX1-NEXT: orq %rsi, %rcx
; AVX1-NEXT: vpextrb $6, %xmm0, %edx
; AVX1-NEXT: andl $15, %edx
; AVX1-NEXT: shlq $48, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: vpextrb $7, %xmm0, %ecx
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: shlq $56, %rcx
; AVX1-NEXT: orq %rdx, %rcx
; AVX1-NEXT: movq %rcx, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: avir_v16i4_to_v128i4:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm1, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm1, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm1, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm1, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, 8(%rdi)
; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: shlq $32, %rcx
; AVX2-NEXT: vmovd %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: vpextrb $1, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $4, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $2, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $8, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $3, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $12, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $4, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $16, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $5, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shll $20, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: vpextrb $6, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shll $24, %edx
; AVX2-NEXT: orl %esi, %edx
; AVX2-NEXT: vpextrb $7, %xmm0, %esi
; AVX2-NEXT: shll $28, %esi
; AVX2-NEXT: orl %edx, %esi
; AVX2-NEXT: orq %rcx, %rsi
; AVX2-NEXT: vpextrb $9, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $36, %rcx
; AVX2-NEXT: orq %rsi, %rcx
; AVX2-NEXT: vpextrb $10, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $40, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $11, %xmm0, %ecx
; AVX2-NEXT: movzwl %cx, %ecx
; AVX2-NEXT: shlq $44, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: vpextrb $12, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: vpextrb $13, %xmm0, %esi
; AVX2-NEXT: movzwl %si, %esi
; AVX2-NEXT: shlq $52, %rsi
; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: vpextrb $14, %xmm0, %edx
; AVX2-NEXT: andl $15, %edx
; AVX2-NEXT: shlq $56, %rdx
; AVX2-NEXT: orq %rsi, %rdx
; AVX2-NEXT: orq %rcx, %rdx
; AVX2-NEXT: vpextrb $15, %xmm0, %ecx
; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: orq %rdx, %rcx
; AVX2-NEXT: movq %rcx, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: avir_v16i4_to_v128i4:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: vmovaps %xmm0, (%rdi)
; AVX512-NEXT: retq
%res = shufflevector <16 x i4> %arg, <16 x i4> poison,
<128 x i32> <i32 0 , i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 3 , i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
ret <128 x i4> %res
}