blob: d8f522ae06e626b85a46c12a18003fa1d77a484f [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SCALAR
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,+pclmul | FileCheck %s --check-prefixes=CHECK,SSE-PCLMUL,SSE2-PCLMUL
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,SSE-PCLMUL,SSE42-PCLMUL
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+pclmul | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+vpclmulqdq | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+vpclmulqdq | FileCheck %s --check-prefixes=CHECK,AVX
define i8 @clmul_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmul_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
; SCALAR-NEXT: xorl %ecx, %ecx
; SCALAR-NEXT: testb $1, %sil
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: cmovel %ecx, %eax
; SCALAR-NEXT: leal (%rdi,%rdi), %edx
; SCALAR-NEXT: movzbl %dl, %edx
; SCALAR-NEXT: testb $2, %sil
; SCALAR-NEXT: cmovel %ecx, %edx
; SCALAR-NEXT: xorl %eax, %edx
; SCALAR-NEXT: leal (,%rdi,4), %eax
; SCALAR-NEXT: movzbl %al, %r8d
; SCALAR-NEXT: testb $4, %sil
; SCALAR-NEXT: cmovel %ecx, %r8d
; SCALAR-NEXT: leal (,%rdi,8), %eax
; SCALAR-NEXT: movzbl %al, %eax
; SCALAR-NEXT: testb $8, %sil
; SCALAR-NEXT: cmovel %ecx, %eax
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shlb $4, %dl
; SCALAR-NEXT: movzbl %dl, %edx
; SCALAR-NEXT: testb $16, %sil
; SCALAR-NEXT: cmovel %ecx, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shlb $5, %r8b
; SCALAR-NEXT: movzbl %r8b, %r8d
; SCALAR-NEXT: testb $32, %sil
; SCALAR-NEXT: cmovel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shlb $6, %dl
; SCALAR-NEXT: movzbl %dl, %edx
; SCALAR-NEXT: testb $64, %sil
; SCALAR-NEXT: cmovel %ecx, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: xorl %eax, %edx
; SCALAR-NEXT: shlb $7, %dil
; SCALAR-NEXT: movzbl %dil, %eax
; SCALAR-NEXT: testb $-128, %sil
; SCALAR-NEXT: cmovel %ecx, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: # kill: def $al killed $al killed $eax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmul_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmul_i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: # kill: def $al killed $al killed $rax
; AVX-NEXT: retq
%res = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
ret i8 %res
}
define i16 @clmul_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmul_i16:
; SCALAR: # %bb.0:
; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
; SCALAR-NEXT: leal (%rdi,%rdi), %eax
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $2, %ecx
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1, %eax
; SCALAR-NEXT: cmovnel %edi, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: leal (,%rdi,4), %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $4, %edx
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $8, %ecx
; SCALAR-NEXT: cmovnel %r8d, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $4, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $16, %edx
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $5, %eax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $32, %r8d
; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $6, %edx
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $64, %eax
; SCALAR-NEXT: cmovnel %edx, %eax
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $128, %edx
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $8, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $9, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $512, %edx # imm = 0x200
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $10, %r8d
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $1024, %ecx # imm = 0x400
; SCALAR-NEXT: cmovnel %r8d, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $12, %eax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $13, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $14, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $16384, %eax # imm = 0x4000
; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: shll $15, %edi
; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
; SCALAR-NEXT: cmovnel %edi, %esi
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: # kill: def $ax killed $ax killed $eax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmul_i16:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmul_i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: # kill: def $ax killed $ax killed $rax
; AVX-NEXT: retq
%res = call i16 @llvm.clmul.i16(i16 %a, i16 %b)
ret i16 %res
}
define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmul_i32:
; SCALAR: # %bb.0:
; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
; SCALAR-NEXT: leal (%rdi,%rdi), %eax
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $2, %ecx
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $1, %edx
; SCALAR-NEXT: cmovnel %edi, %edx
; SCALAR-NEXT: xorl %ecx, %edx
; SCALAR-NEXT: leal (,%rdi,4), %eax
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $4, %ecx
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $8, %eax
; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $4, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $16, %edx
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $5, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $32, %r8d
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $6, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $64, %ecx
; SCALAR-NEXT: cmovnel %edx, %ecx
; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $7, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $128, %edx
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $8, %eax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $9, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $512, %edx # imm = 0x200
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $10, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1024, %eax # imm = 0x400
; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $11, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $12, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $13, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $14, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $15, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
; SCALAR-NEXT: cmovnel %edx, %ecx
; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $16, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $17, %eax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $18, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $19, %eax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
; SCALAR-NEXT: cmovnel %eax, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $20, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $21, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $22, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $23, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $24, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $25, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $26, %ecx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %r8d, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $27, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
; SCALAR-NEXT: cmovnel %ecx, %r8d
; SCALAR-NEXT: xorl %edx, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $28, %edx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000
; SCALAR-NEXT: cmovnel %edx, %ecx
; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $29, %eax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $30, %r8d
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1073741824, %eax # imm = 0x40000000
; SCALAR-NEXT: cmovnel %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: shll $31, %edi
; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000
; SCALAR-NEXT: cmovnel %edi, %esi
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmul_i32:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmul_i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: # kill: def $eax killed $eax killed $rax
; AVX-NEXT: retq
%res = call i32 @llvm.clmul.i32(i32 %a, i32 %b)
ret i32 %res
}
define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmul_i64:
; SCALAR: # %bb.0:
; SCALAR-NEXT: leaq (%rdi,%rdi), %rax
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $2, %ecx
; SCALAR-NEXT: cmovneq %rax, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $1, %edx
; SCALAR-NEXT: cmovneq %rdi, %rdx
; SCALAR-NEXT: xorq %rcx, %rdx
; SCALAR-NEXT: leaq (,%rdi,4), %rax
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $4, %ecx
; SCALAR-NEXT: cmovneq %rax, %rcx
; SCALAR-NEXT: leaq (,%rdi,8), %r8
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $8, %eax
; SCALAR-NEXT: cmovneq %r8, %rax
; SCALAR-NEXT: xorq %rcx, %rax
; SCALAR-NEXT: xorq %rdx, %rax
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $4, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $16, %edx
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $5, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $32, %r8d
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $6, %rdx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $64, %ecx
; SCALAR-NEXT: cmovneq %rdx, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: xorq %rax, %rcx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $7, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $128, %edx
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $8, %rax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
; SCALAR-NEXT: cmovneq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $9, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $512, %edx # imm = 0x200
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $10, %r8
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $1024, %eax # imm = 0x400
; SCALAR-NEXT: cmovneq %r8, %rax
; SCALAR-NEXT: xorq %rdx, %rax
; SCALAR-NEXT: xorq %rcx, %rax
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $11, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $2048, %edx # imm = 0x800
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $12, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $13, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $14, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $15, %rdx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000
; SCALAR-NEXT: cmovneq %rdx, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: xorq %rax, %rcx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $16, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $17, %rax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
; SCALAR-NEXT: cmovneq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $18, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $19, %rax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
; SCALAR-NEXT: cmovneq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $20, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $21, %r8
; SCALAR-NEXT: movl %esi, %eax
; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000
; SCALAR-NEXT: cmovneq %r8, %rax
; SCALAR-NEXT: xorq %rdx, %rax
; SCALAR-NEXT: xorq %rcx, %rax
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $22, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $23, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $24, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $25, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $26, %rcx
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000
; SCALAR-NEXT: cmovneq %rcx, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $27, %rcx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
; SCALAR-NEXT: cmovneq %rcx, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $28, %rdx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000
; SCALAR-NEXT: cmovneq %rdx, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: xorq %rax, %rcx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $29, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $30, %rax
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
; SCALAR-NEXT: cmovneq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shlq $31, %rax
; SCALAR-NEXT: movl %esi, %edx
; SCALAR-NEXT: andl $-2147483648, %edx # imm = 0x80000000
; SCALAR-NEXT: cmovneq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $32, %r8
; SCALAR-NEXT: xorl %eax, %eax
; SCALAR-NEXT: btq $32, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $33, %rdx
; SCALAR-NEXT: btq $33, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $34, %r8
; SCALAR-NEXT: btq $34, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %r9
; SCALAR-NEXT: shlq $35, %r9
; SCALAR-NEXT: btq $35, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $36, %rdx
; SCALAR-NEXT: btq $36, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r9, %rdx
; SCALAR-NEXT: xorq %rcx, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $37, %rcx
; SCALAR-NEXT: btq $37, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $38, %r8
; SCALAR-NEXT: btq $38, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $39, %rcx
; SCALAR-NEXT: btq $39, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $40, %r8
; SCALAR-NEXT: btq $40, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $41, %rcx
; SCALAR-NEXT: btq $41, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $42, %r8
; SCALAR-NEXT: btq $42, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $43, %rcx
; SCALAR-NEXT: btq $43, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $44, %r8
; SCALAR-NEXT: btq $44, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %r9
; SCALAR-NEXT: shlq $45, %r9
; SCALAR-NEXT: btq $45, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $46, %rcx
; SCALAR-NEXT: btq $46, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r9, %rcx
; SCALAR-NEXT: xorq %rdx, %rcx
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $47, %rdx
; SCALAR-NEXT: btq $47, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $48, %r8
; SCALAR-NEXT: btq $48, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $49, %rdx
; SCALAR-NEXT: btq $49, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $50, %r8
; SCALAR-NEXT: btq $50, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $51, %rdx
; SCALAR-NEXT: btq $51, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $52, %r8
; SCALAR-NEXT: btq $52, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $53, %rdx
; SCALAR-NEXT: btq $53, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $54, %r8
; SCALAR-NEXT: btq $54, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $55, %rdx
; SCALAR-NEXT: btq $55, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $56, %r8
; SCALAR-NEXT: btq $56, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rdx, %r8
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: shlq $57, %rdx
; SCALAR-NEXT: btq $57, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rdx
; SCALAR-NEXT: xorq %r8, %rdx
; SCALAR-NEXT: xorq %rcx, %rdx
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $58, %rcx
; SCALAR-NEXT: btq $58, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $59, %r8
; SCALAR-NEXT: btq $59, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $60, %rcx
; SCALAR-NEXT: btq $60, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: shlq $61, %r8
; SCALAR-NEXT: btq $61, %rsi
; SCALAR-NEXT: cmovaeq %rax, %r8
; SCALAR-NEXT: xorq %rcx, %r8
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: shlq $62, %rcx
; SCALAR-NEXT: btq $62, %rsi
; SCALAR-NEXT: cmovaeq %rax, %rcx
; SCALAR-NEXT: xorq %r8, %rcx
; SCALAR-NEXT: shlq $63, %rdi
; SCALAR-NEXT: btq $63, %rsi
; SCALAR-NEXT: cmovbq %rdi, %rax
; SCALAR-NEXT: xorq %rcx, %rax
; SCALAR-NEXT: xorq %rdx, %rax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmul_i64:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE-PCLMUL-NEXT: movq %rdi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmul_i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovq %rsi, %xmm0
; AVX-NEXT: vmovq %rdi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
%res = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
ret i64 %res
}
define i8 @clmulr_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmulr_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movzbl %dil, %ecx
; SCALAR-NEXT: movl %ecx, %r11d
; SCALAR-NEXT: shll $8, %r11d
; SCALAR-NEXT: movl %ecx, %r10d
; SCALAR-NEXT: shll $9, %r10d
; SCALAR-NEXT: movl %ecx, %r9d
; SCALAR-NEXT: shll $10, %r9d
; SCALAR-NEXT: movl %ecx, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: shll $12, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $13, %edx
; SCALAR-NEXT: xorl %ebx, %ebx
; SCALAR-NEXT: testw %bx, %bx
; SCALAR-NEXT: cmovel %ebx, %edx
; SCALAR-NEXT: cmovel %ebx, %r8d
; SCALAR-NEXT: cmovel %ebx, %eax
; SCALAR-NEXT: cmovel %ebx, %r9d
; SCALAR-NEXT: cmovel %ebx, %r10d
; SCALAR-NEXT: cmovel %ebx, %r11d
; SCALAR-NEXT: shll $14, %edi
; SCALAR-NEXT: testw %bx, %bx
; SCALAR-NEXT: cmovnel %edi, %ebx
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $1, %edi
; SCALAR-NEXT: cmovnel %ecx, %edi
; SCALAR-NEXT: leal (%rcx,%rcx), %ebp
; SCALAR-NEXT: movl %esi, %r14d
; SCALAR-NEXT: andl $2, %r14d
; SCALAR-NEXT: cmovnel %ebp, %r14d
; SCALAR-NEXT: xorl %edi, %r14d
; SCALAR-NEXT: leal (,%rcx,4), %edi
; SCALAR-NEXT: movl %esi, %ebp
; SCALAR-NEXT: andl $4, %ebp
; SCALAR-NEXT: cmovnel %edi, %ebp
; SCALAR-NEXT: leal (,%rcx,8), %r15d
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $8, %edi
; SCALAR-NEXT: cmovnel %r15d, %edi
; SCALAR-NEXT: xorl %ebp, %edi
; SCALAR-NEXT: xorl %r14d, %edi
; SCALAR-NEXT: movl %ecx, %ebp
; SCALAR-NEXT: shll $4, %ebp
; SCALAR-NEXT: movl %esi, %r14d
; SCALAR-NEXT: andl $16, %r14d
; SCALAR-NEXT: cmovnel %ebp, %r14d
; SCALAR-NEXT: movl %ecx, %ebp
; SCALAR-NEXT: shll $5, %ebp
; SCALAR-NEXT: movl %esi, %r15d
; SCALAR-NEXT: andl $32, %r15d
; SCALAR-NEXT: cmovnel %ebp, %r15d
; SCALAR-NEXT: xorl %r14d, %r15d
; SCALAR-NEXT: movl %ecx, %ebp
; SCALAR-NEXT: shll $6, %ebp
; SCALAR-NEXT: movl %esi, %r14d
; SCALAR-NEXT: andl $64, %r14d
; SCALAR-NEXT: cmovnel %ebp, %r14d
; SCALAR-NEXT: xorl %r15d, %r14d
; SCALAR-NEXT: xorl %edi, %r14d
; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: andl $128, %esi
; SCALAR-NEXT: cmovel %esi, %ecx
; SCALAR-NEXT: xorl %r11d, %ecx
; SCALAR-NEXT: xorl %r10d, %ecx
; SCALAR-NEXT: xorl %r9d, %ecx
; SCALAR-NEXT: xorl %r14d, %ecx
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ebx, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: shrl $7, %eax
; SCALAR-NEXT: # kill: def $al killed $al killed $eax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulr_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzbl %sil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzbl %dil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $7, %eax
; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulr_i8:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %sil, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $7, %eax
; AVX-NEXT: # kill: def $al killed $al killed $rax
; AVX-NEXT: retq
%a.ext = zext i8 %a to i16
%b.ext = zext i8 %b to i16
%clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
%res.ext = lshr i16 %clmul, 7
%res = trunc i16 %res.ext to i8
ret i8 %res
}
define i16 @clmulr_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmulr_i16:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %r13
; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: movl %edi, %r13d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: movl %edi, %r15d
; SCALAR-NEXT: movl %edi, %r9d
; SCALAR-NEXT: movl %edi, %ebp
; SCALAR-NEXT: movl %edi, %ebx
; SCALAR-NEXT: movl %edi, %r11d
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: movl %edi, %esi
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: movl %edi, %r14d
; SCALAR-NEXT: shll $16, %r14d
; SCALAR-NEXT: shll $17, %ecx
; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: shll $18, %r13d
; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: shll $19, %r12d
; SCALAR-NEXT: movl %r12d, %r13d
; SCALAR-NEXT: shll $20, %r15d
; SCALAR-NEXT: movl %r15d, %r12d
; SCALAR-NEXT: shll $21, %r9d
; SCALAR-NEXT: movl %r9d, %r15d
; SCALAR-NEXT: shll $22, %ebp
; SCALAR-NEXT: shll $23, %ebx
; SCALAR-NEXT: movl %ebx, %r9d
; SCALAR-NEXT: shll $24, %r11d
; SCALAR-NEXT: movl %r11d, %ebx
; SCALAR-NEXT: shll $25, %r8d
; SCALAR-NEXT: movl %r8d, %r11d
; SCALAR-NEXT: shll $26, %eax
; SCALAR-NEXT: movl %eax, %r8d
; SCALAR-NEXT: shll $27, %esi
; SCALAR-NEXT: shll $28, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $29, %ecx
; SCALAR-NEXT: xorl %eax, %eax
; SCALAR-NEXT: testl $0, %eax
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %edx
; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %esi
; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r11d
; SCALAR-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %ebx
; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r9d
; SCALAR-NEXT: cmovel %eax, %ebp
; SCALAR-NEXT: cmovel %eax, %r15d
; SCALAR-NEXT: movl %r15d, %r8d
; SCALAR-NEXT: cmovel %eax, %r12d
; SCALAR-NEXT: movl %r12d, %r15d
; SCALAR-NEXT: cmovel %eax, %r13d
; SCALAR-NEXT: movl %r13d, %r12d
; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Reload
; SCALAR-NEXT: cmovel %eax, %r13d
; SCALAR-NEXT: cmovel %eax, %r14d
; SCALAR-NEXT: movl %edi, %r11d
; SCALAR-NEXT: shll $30, %r11d
; SCALAR-NEXT: testl $0, %eax
; SCALAR-NEXT: cmovel %eax, %r11d
; SCALAR-NEXT: movzwl %di, %edi
; SCALAR-NEXT: movl %r10d, %eax
; SCALAR-NEXT: andl $1, %eax
; SCALAR-NEXT: cmovnel %edi, %eax
; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $2, %ecx
; SCALAR-NEXT: leal (%rdi,%rdi), %esi
; SCALAR-NEXT: cmovnel %esi, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %r10d, %eax
; SCALAR-NEXT: andl $4, %eax
; SCALAR-NEXT: leal (,%rdi,4), %esi
; SCALAR-NEXT: cmovnel %esi, %eax
; SCALAR-NEXT: movl %r10d, %esi
; SCALAR-NEXT: andl $8, %esi
; SCALAR-NEXT: leal (,%rdi,8), %ebx
; SCALAR-NEXT: cmovnel %ebx, %esi
; SCALAR-NEXT: xorl %eax, %esi
; SCALAR-NEXT: xorl %ecx, %esi
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $4, %eax
; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $16, %ecx
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $5, %eax
; SCALAR-NEXT: movl %r10d, %ebx
; SCALAR-NEXT: andl $32, %ebx
; SCALAR-NEXT: cmovnel %eax, %ebx
; SCALAR-NEXT: xorl %ecx, %ebx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $6, %ecx
; SCALAR-NEXT: movl %r10d, %eax
; SCALAR-NEXT: andl $64, %eax
; SCALAR-NEXT: cmovnel %ecx, %eax
; SCALAR-NEXT: xorl %ebx, %eax
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: movl %r10d, %esi
; SCALAR-NEXT: andl $128, %esi
; SCALAR-NEXT: cmovnel %ecx, %esi
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $8, %ecx
; SCALAR-NEXT: movl %r10d, %ebx
; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
; SCALAR-NEXT: cmovnel %ecx, %ebx
; SCALAR-NEXT: xorl %esi, %ebx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $9, %ecx
; SCALAR-NEXT: movl %r10d, %edx
; SCALAR-NEXT: andl $512, %edx # imm = 0x200
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %ebx, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $10, %ecx
; SCALAR-NEXT: movl %r10d, %esi
; SCALAR-NEXT: andl $1024, %esi # imm = 0x400
; SCALAR-NEXT: cmovnel %ecx, %esi
; SCALAR-NEXT: xorl %edx, %esi
; SCALAR-NEXT: xorl %eax, %esi
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $12, %eax
; SCALAR-NEXT: movl %r10d, %edx
; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %ecx, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $13, %eax
; SCALAR-NEXT: movl %r10d, %ecx
; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $14, %eax
; SCALAR-NEXT: movl %r10d, %edx
; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %ecx, %edx
; SCALAR-NEXT: shll $15, %edi
; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
; SCALAR-NEXT: cmovel %r10d, %edi
; SCALAR-NEXT: xorl %edx, %edi
; SCALAR-NEXT: xorl %esi, %edi
; SCALAR-NEXT: xorl %r13d, %r14d
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
; SCALAR-NEXT: xorl %r12d, %r14d
; SCALAR-NEXT: xorl %r15d, %r14d
; SCALAR-NEXT: xorl %r8d, %r14d
; SCALAR-NEXT: xorl %edi, %r14d
; SCALAR-NEXT: xorl %r9d, %ebp
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
; SCALAR-NEXT: xorl %r14d, %ebp
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
; SCALAR-NEXT: xorl %ebp, %r11d
; SCALAR-NEXT: shrl $15, %r11d
; SCALAR-NEXT: movl %r11d, %eax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r12
; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulr_i16:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzwl %si, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzwl %di, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $15, %eax
; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulr_i16:
; AVX: # %bb.0:
; AVX-NEXT: movzwl %si, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $15, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $rax
; AVX-NEXT: retq
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
%clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext)
%res.ext = lshr i32 %clmul, 15
%res = trunc i32 %res.ext to i16
ret i16 %res
}
define i32 @clmulr_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmulr_i32:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: leaq (%rax,%rax), %rdx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $2, %esi
; SCALAR-NEXT: cmovneq %rdx, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $1, %edi
; SCALAR-NEXT: cmovneq %rax, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: leaq (,%rax,4), %rdx
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $4, %esi
; SCALAR-NEXT: cmovneq %rdx, %rsi
; SCALAR-NEXT: leaq (,%rax,8), %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $8, %edx
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $4, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $16, %edi
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $5, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $32, %r8d
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $6, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $64, %esi
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $7, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $128, %edi
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $8, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $9, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $512, %edi # imm = 0x200
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $10, %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $11, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $2048, %edi # imm = 0x800
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $12, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $13, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $14, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $15, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $16, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $17, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $18, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $19, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $20, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $21, %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $22, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $23, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $24, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $25, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $26, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $27, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $28, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $29, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $30, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $31, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $32, %r8
; SCALAR-NEXT: xorl %edx, %edx
; SCALAR-NEXT: btq $32, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $33, %rdi
; SCALAR-NEXT: btq $33, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $34, %r8
; SCALAR-NEXT: btq $34, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %r9
; SCALAR-NEXT: shlq $35, %r9
; SCALAR-NEXT: btq $35, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $36, %rdi
; SCALAR-NEXT: btq $36, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r9, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $37, %rsi
; SCALAR-NEXT: btq $37, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $38, %r8
; SCALAR-NEXT: btq $38, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $39, %rsi
; SCALAR-NEXT: btq $39, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $40, %r8
; SCALAR-NEXT: btq $40, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $41, %rsi
; SCALAR-NEXT: btq $41, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $42, %r8
; SCALAR-NEXT: btq $42, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $43, %rsi
; SCALAR-NEXT: btq $43, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $44, %r8
; SCALAR-NEXT: btq $44, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %r9
; SCALAR-NEXT: shlq $45, %r9
; SCALAR-NEXT: btq $45, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $46, %rsi
; SCALAR-NEXT: btq $46, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r9, %rsi
; SCALAR-NEXT: xorq %rdi, %rsi
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $47, %rdi
; SCALAR-NEXT: btq $47, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $48, %r8
; SCALAR-NEXT: btq $48, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $49, %rdi
; SCALAR-NEXT: btq $49, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $50, %r8
; SCALAR-NEXT: btq $50, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $51, %rdi
; SCALAR-NEXT: btq $51, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $52, %r8
; SCALAR-NEXT: btq $52, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $53, %rdi
; SCALAR-NEXT: btq $53, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $54, %r8
; SCALAR-NEXT: btq $54, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $55, %rdi
; SCALAR-NEXT: btq $55, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $56, %r8
; SCALAR-NEXT: btq $56, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $57, %rdi
; SCALAR-NEXT: btq $57, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $58, %rsi
; SCALAR-NEXT: btq $58, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $59, %r8
; SCALAR-NEXT: btq $59, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $60, %rsi
; SCALAR-NEXT: btq $60, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $61, %r8
; SCALAR-NEXT: btq $61, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: shlq $62, %rax
; SCALAR-NEXT: btq $62, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rax
; SCALAR-NEXT: xorq %r8, %rax
; SCALAR-NEXT: xorq %rdi, %rax
; SCALAR-NEXT: shrq $31, %rax
; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulr_i32:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrq $31, %rax
; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulr_i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrq $31, %rax
; AVX-NEXT: # kill: def $eax killed $eax killed $rax
; AVX-NEXT: retq
%a.ext = zext i32 %a to i64
%b.ext = zext i32 %b to i64
%clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
%res.ext = lshr i64 %clmul, 31
%res = trunc i64 %res.ext to i32
ret i32 %res
}
define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmulr_i64:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shrq $4, %rax
; SCALAR-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; SCALAR-NEXT: andq %rcx, %rax
; SCALAR-NEXT: andq %rcx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rax, %rdi
; SCALAR-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: andq %rax, %rdx
; SCALAR-NEXT: shrq $2, %rdi
; SCALAR-NEXT: andq %rax, %rdi
; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rdi
; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: andq %rdx, %r8
; SCALAR-NEXT: shrq %rdi
; SCALAR-NEXT: movq %rdi, %r9
; SCALAR-NEXT: andq %rdx, %r9
; SCALAR-NEXT: leaq (%r9,%r8,2), %r8
; SCALAR-NEXT: leaq (%r8,%r8), %r9
; SCALAR-NEXT: bswapq %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: shrq $4, %r10
; SCALAR-NEXT: andq %rcx, %r10
; SCALAR-NEXT: andq %rcx, %rsi
; SCALAR-NEXT: shlq $4, %rsi
; SCALAR-NEXT: orq %r10, %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: andq %rax, %r10
; SCALAR-NEXT: shrq $2, %rsi
; SCALAR-NEXT: andq %rax, %rsi
; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: andq %rdx, %r10
; SCALAR-NEXT: shrq %rsi
; SCALAR-NEXT: andq %rdx, %rsi
; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $2, %r10d
; SCALAR-NEXT: cmovneq %r9, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $1, %r11d
; SCALAR-NEXT: cmovneq %r8, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: leaq (,%r8,4), %r9
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $4, %r10d
; SCALAR-NEXT: cmovneq %r9, %r10
; SCALAR-NEXT: leaq (,%r8,8), %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $8, %r9d
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $4, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $16, %r11d
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $5, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $32, %ebx
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $6, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $64, %r10d
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $7, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $128, %r11d
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $8, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $9, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $512, %r11d # imm = 0x200
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $10, %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $11, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $12, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $13, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $14, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $15, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $16, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $17, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $18, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $19, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $20, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $21, %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $22, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $23, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $24, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $25, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $26, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $27, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $28, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $29, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $30, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $31, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $32, %rbx
; SCALAR-NEXT: xorl %r9d, %r9d
; SCALAR-NEXT: btq $32, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $33, %r11
; SCALAR-NEXT: btq $33, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $34, %rbx
; SCALAR-NEXT: btq $34, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r14
; SCALAR-NEXT: shlq $35, %r14
; SCALAR-NEXT: btq $35, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r14
; SCALAR-NEXT: xorq %rbx, %r14
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $36, %r11
; SCALAR-NEXT: btq $36, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %r14, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $37, %r10
; SCALAR-NEXT: btq $37, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $38, %rbx
; SCALAR-NEXT: btq $38, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $39, %r10
; SCALAR-NEXT: btq $39, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $40, %rbx
; SCALAR-NEXT: btq $40, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $41, %r10
; SCALAR-NEXT: btq $41, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $42, %rbx
; SCALAR-NEXT: btq $42, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $43, %r10
; SCALAR-NEXT: btq $43, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $44, %rbx
; SCALAR-NEXT: btq $44, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r14
; SCALAR-NEXT: shlq $45, %r14
; SCALAR-NEXT: btq $45, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r14
; SCALAR-NEXT: xorq %rbx, %r14
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $46, %r10
; SCALAR-NEXT: btq $46, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %r14, %r10
; SCALAR-NEXT: xorq %r11, %r10
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $47, %r11
; SCALAR-NEXT: btq $47, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $48, %rbx
; SCALAR-NEXT: btq $48, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $49, %r11
; SCALAR-NEXT: btq $49, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $50, %rbx
; SCALAR-NEXT: btq $50, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $51, %r11
; SCALAR-NEXT: btq $51, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $52, %rbx
; SCALAR-NEXT: btq $52, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $53, %r11
; SCALAR-NEXT: btq $53, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $54, %rbx
; SCALAR-NEXT: btq $54, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $55, %r11
; SCALAR-NEXT: btq $55, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $56, %rbx
; SCALAR-NEXT: btq $56, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $57, %r11
; SCALAR-NEXT: btq $57, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $58, %r10
; SCALAR-NEXT: btq $58, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $59, %rbx
; SCALAR-NEXT: btq $59, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $60, %r10
; SCALAR-NEXT: btq $60, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $61, %rbx
; SCALAR-NEXT: btq $61, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: shlq $62, %r8
; SCALAR-NEXT: btq $62, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r8
; SCALAR-NEXT: xorq %rbx, %r8
; SCALAR-NEXT: shlq $63, %rdi
; SCALAR-NEXT: btq $63, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %r11, %rdi
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rsi
; SCALAR-NEXT: shrq $4, %rsi
; SCALAR-NEXT: andq %rcx, %rsi
; SCALAR-NEXT: andq %rcx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rsi, %rdi
; SCALAR-NEXT: movq %rdi, %rcx
; SCALAR-NEXT: andq %rax, %rcx
; SCALAR-NEXT: shrq $2, %rdi
; SCALAR-NEXT: andq %rax, %rdi
; SCALAR-NEXT: leaq (%rdi,%rcx,4), %rax
; SCALAR-NEXT: movq %rax, %rcx
; SCALAR-NEXT: andq %rdx, %rcx
; SCALAR-NEXT: shrq %rax
; SCALAR-NEXT: andq %rdx, %rax
; SCALAR-NEXT: leaq (%rax,%rcx,2), %rax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: retq
;
; SSE2-PCLMUL-LABEL: clmulr_i64:
; SSE2-PCLMUL: # %bb.0:
; SSE2-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE2-PCLMUL-NEXT: movq %rdi, %xmm1
; SSE2-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE2-PCLMUL-NEXT: movq %xmm1, %rcx
; SSE2-PCLMUL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE2-PCLMUL-NEXT: movq %xmm0, %rax
; SSE2-PCLMUL-NEXT: shldq $1, %rcx, %rax
; SSE2-PCLMUL-NEXT: retq
;
; SSE42-PCLMUL-LABEL: clmulr_i64:
; SSE42-PCLMUL: # %bb.0:
; SSE42-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE42-PCLMUL-NEXT: movq %rdi, %xmm1
; SSE42-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE42-PCLMUL-NEXT: movq %xmm1, %rcx
; SSE42-PCLMUL-NEXT: pextrq $1, %xmm1, %rax
; SSE42-PCLMUL-NEXT: shldq $1, %rcx, %rax
; SSE42-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulr_i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovq %rsi, %xmm0
; AVX-NEXT: vmovq %rdi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: shldq $1, %rcx, %rax
; AVX-NEXT: retq
%a.ext = zext i64 %a to i128
%b.ext = zext i64 %b to i128
%clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
%res.ext = lshr i128 %clmul, 63
%res = trunc i128 %res.ext to i64
ret i64 %res
}
define i8 @clmulh_i8(i8 %a, i8 %b) nounwind {
; SCALAR-LABEL: clmulh_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movzbl %dil, %ecx
; SCALAR-NEXT: movl %ecx, %ebx
; SCALAR-NEXT: shll $8, %ebx
; SCALAR-NEXT: movl %ecx, %r11d
; SCALAR-NEXT: shll $9, %r11d
; SCALAR-NEXT: movl %ecx, %r10d
; SCALAR-NEXT: shll $10, %r10d
; SCALAR-NEXT: movl %ecx, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %ecx, %r9d
; SCALAR-NEXT: shll $12, %r9d
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: shll $13, %r8d
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: shll $14, %edx
; SCALAR-NEXT: xorl %ebp, %ebp
; SCALAR-NEXT: testw %bp, %bp
; SCALAR-NEXT: cmovel %ebp, %edx
; SCALAR-NEXT: cmovel %ebp, %r8d
; SCALAR-NEXT: cmovel %ebp, %r9d
; SCALAR-NEXT: cmovel %ebp, %eax
; SCALAR-NEXT: cmovel %ebp, %r10d
; SCALAR-NEXT: cmovel %ebp, %r11d
; SCALAR-NEXT: cmovel %ebp, %ebx
; SCALAR-NEXT: shll $15, %edi
; SCALAR-NEXT: testw %bp, %bp
; SCALAR-NEXT: cmovnel %edi, %ebp
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $1, %edi
; SCALAR-NEXT: cmovnel %ecx, %edi
; SCALAR-NEXT: leal (%rcx,%rcx), %r14d
; SCALAR-NEXT: movl %esi, %r15d
; SCALAR-NEXT: andl $2, %r15d
; SCALAR-NEXT: cmovnel %r14d, %r15d
; SCALAR-NEXT: xorl %edi, %r15d
; SCALAR-NEXT: leal (,%rcx,4), %edi
; SCALAR-NEXT: movl %esi, %r14d
; SCALAR-NEXT: andl $4, %r14d
; SCALAR-NEXT: cmovnel %edi, %r14d
; SCALAR-NEXT: leal (,%rcx,8), %r12d
; SCALAR-NEXT: movl %esi, %edi
; SCALAR-NEXT: andl $8, %edi
; SCALAR-NEXT: cmovnel %r12d, %edi
; SCALAR-NEXT: xorl %r14d, %edi
; SCALAR-NEXT: xorl %r15d, %edi
; SCALAR-NEXT: movl %ecx, %r14d
; SCALAR-NEXT: shll $4, %r14d
; SCALAR-NEXT: movl %esi, %r15d
; SCALAR-NEXT: andl $16, %r15d
; SCALAR-NEXT: cmovnel %r14d, %r15d
; SCALAR-NEXT: movl %ecx, %r14d
; SCALAR-NEXT: shll $5, %r14d
; SCALAR-NEXT: movl %esi, %r12d
; SCALAR-NEXT: andl $32, %r12d
; SCALAR-NEXT: cmovnel %r14d, %r12d
; SCALAR-NEXT: xorl %r15d, %r12d
; SCALAR-NEXT: movl %ecx, %r14d
; SCALAR-NEXT: shll $6, %r14d
; SCALAR-NEXT: movl %esi, %r15d
; SCALAR-NEXT: andl $64, %r15d
; SCALAR-NEXT: cmovnel %r14d, %r15d
; SCALAR-NEXT: xorl %r12d, %r15d
; SCALAR-NEXT: xorl %edi, %r15d
; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: andl $128, %esi
; SCALAR-NEXT: cmovel %esi, %ecx
; SCALAR-NEXT: xorl %ebx, %ecx
; SCALAR-NEXT: xorl %r11d, %ecx
; SCALAR-NEXT: xorl %r10d, %ecx
; SCALAR-NEXT: xorl %r15d, %ecx
; SCALAR-NEXT: xorl %r9d, %eax
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %edx, %eax
; SCALAR-NEXT: xorl %ebp, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: shrl $8, %eax
; SCALAR-NEXT: # kill: def $al killed $al killed $eax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r12
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulh_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzbl %sil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzbl %dil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $8, %eax
; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulh_i8:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %sil, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $8, %eax
; AVX-NEXT: # kill: def $al killed $al killed $rax
; AVX-NEXT: retq
%a.ext = zext i8 %a to i16
%b.ext = zext i8 %b to i16
%clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext)
%res.ext = lshr i16 %clmul, 8
%res = trunc i16 %res.ext to i8
ret i8 %res
}
define i16 @clmulh_i16(i16 %a, i16 %b) nounwind {
; SCALAR-LABEL: clmulh_i16:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %r13
; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: movl %edi, %r14d
; SCALAR-NEXT: movl %edi, %r13d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: movl %edi, %r15d
; SCALAR-NEXT: movl %edi, %ebp
; SCALAR-NEXT: movl %edi, %r10d
; SCALAR-NEXT: movl %edi, %ebx
; SCALAR-NEXT: movl %edi, %r11d
; SCALAR-NEXT: movl %edi, %esi
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: movl %edi, %edx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shll $16, %r8d
; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: shll $17, %r14d
; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: shll $18, %r13d
; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: shll $19, %r12d
; SCALAR-NEXT: movl %r12d, %r13d
; SCALAR-NEXT: shll $20, %r15d
; SCALAR-NEXT: movl %r15d, %r12d
; SCALAR-NEXT: shll $21, %ebp
; SCALAR-NEXT: movl %ebp, %r15d
; SCALAR-NEXT: shll $22, %r10d
; SCALAR-NEXT: shll $23, %ebx
; SCALAR-NEXT: movl %ebx, %r14d
; SCALAR-NEXT: shll $24, %r11d
; SCALAR-NEXT: movl %r11d, %ebp
; SCALAR-NEXT: shll $25, %esi
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: shll $26, %ecx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: shll $27, %eax
; SCALAR-NEXT: movl %eax, %esi
; SCALAR-NEXT: shll $28, %edx
; SCALAR-NEXT: movl %edi, %r11d
; SCALAR-NEXT: shll $29, %r11d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $30, %ecx
; SCALAR-NEXT: xorl %eax, %eax
; SCALAR-NEXT: testl $0, %eax
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r11d
; SCALAR-NEXT: cmovel %eax, %edx
; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %esi
; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %ebx
; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %ebp
; SCALAR-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r14d
; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r10d
; SCALAR-NEXT: cmovel %eax, %r15d
; SCALAR-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SCALAR-NEXT: cmovel %eax, %r12d
; SCALAR-NEXT: cmovel %eax, %r13d
; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
; SCALAR-NEXT: cmovel %eax, %r15d
; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Reload
; SCALAR-NEXT: cmovel %eax, %r14d
; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: movl %edi, %ebx
; SCALAR-NEXT: shll $31, %ebx
; SCALAR-NEXT: testl $0, %eax
; SCALAR-NEXT: cmovel %eax, %ebx
; SCALAR-NEXT: movzwl %di, %edi
; SCALAR-NEXT: movl %r9d, %eax
; SCALAR-NEXT: andl $1, %eax
; SCALAR-NEXT: cmovnel %edi, %eax
; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $2, %ecx
; SCALAR-NEXT: leal (%rdi,%rdi), %esi
; SCALAR-NEXT: cmovnel %esi, %ecx
; SCALAR-NEXT: xorl %eax, %ecx
; SCALAR-NEXT: movl %r9d, %eax
; SCALAR-NEXT: andl $4, %eax
; SCALAR-NEXT: leal (,%rdi,4), %esi
; SCALAR-NEXT: cmovnel %esi, %eax
; SCALAR-NEXT: movl %r9d, %esi
; SCALAR-NEXT: andl $8, %esi
; SCALAR-NEXT: leal (,%rdi,8), %ebp
; SCALAR-NEXT: cmovnel %ebp, %esi
; SCALAR-NEXT: xorl %eax, %esi
; SCALAR-NEXT: xorl %ecx, %esi
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $4, %eax
; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $16, %ecx
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $5, %eax
; SCALAR-NEXT: movl %r9d, %ebp
; SCALAR-NEXT: andl $32, %ebp
; SCALAR-NEXT: cmovnel %eax, %ebp
; SCALAR-NEXT: xorl %ecx, %ebp
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $6, %ecx
; SCALAR-NEXT: movl %r9d, %eax
; SCALAR-NEXT: andl $64, %eax
; SCALAR-NEXT: cmovnel %ecx, %eax
; SCALAR-NEXT: xorl %ebp, %eax
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $7, %ecx
; SCALAR-NEXT: movl %r9d, %esi
; SCALAR-NEXT: andl $128, %esi
; SCALAR-NEXT: cmovnel %ecx, %esi
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $8, %ecx
; SCALAR-NEXT: movl %r9d, %ebp
; SCALAR-NEXT: andl $256, %ebp # imm = 0x100
; SCALAR-NEXT: cmovnel %ecx, %ebp
; SCALAR-NEXT: xorl %esi, %ebp
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $9, %ecx
; SCALAR-NEXT: movl %r9d, %edx
; SCALAR-NEXT: andl $512, %edx # imm = 0x200
; SCALAR-NEXT: cmovnel %ecx, %edx
; SCALAR-NEXT: xorl %ebp, %edx
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shll $10, %ecx
; SCALAR-NEXT: movl %r9d, %esi
; SCALAR-NEXT: andl $1024, %esi # imm = 0x400
; SCALAR-NEXT: cmovnel %ecx, %esi
; SCALAR-NEXT: xorl %edx, %esi
; SCALAR-NEXT: xorl %eax, %esi
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $12, %eax
; SCALAR-NEXT: movl %r9d, %edx
; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %ecx, %edx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $13, %eax
; SCALAR-NEXT: movl %r9d, %ecx
; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000
; SCALAR-NEXT: cmovnel %eax, %ecx
; SCALAR-NEXT: xorl %edx, %ecx
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: shll $14, %eax
; SCALAR-NEXT: movl %r9d, %edx
; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000
; SCALAR-NEXT: cmovnel %eax, %edx
; SCALAR-NEXT: xorl %ecx, %edx
; SCALAR-NEXT: shll $15, %edi
; SCALAR-NEXT: andl $32768, %r9d # imm = 0x8000
; SCALAR-NEXT: cmovel %r9d, %edi
; SCALAR-NEXT: xorl %edx, %edi
; SCALAR-NEXT: xorl %esi, %edi
; SCALAR-NEXT: xorl %r14d, %r8d
; SCALAR-NEXT: xorl %r15d, %r8d
; SCALAR-NEXT: xorl %r13d, %r8d
; SCALAR-NEXT: xorl %r12d, %r8d
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
; SCALAR-NEXT: xorl %edi, %r8d
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
; SCALAR-NEXT: xorl %r8d, %r10d
; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
; SCALAR-NEXT: xorl %ebx, %r11d
; SCALAR-NEXT: xorl %r10d, %r11d
; SCALAR-NEXT: shrl $16, %r11d
; SCALAR-NEXT: movl %r11d, %eax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r12
; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulh_i16:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzwl %si, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzwl %di, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $16, %eax
; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulh_i16:
; AVX: # %bb.0:
; AVX-NEXT: movzwl %si, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $16, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $rax
; AVX-NEXT: retq
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
%clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext)
%res.ext = lshr i32 %clmul, 16
%res = trunc i32 %res.ext to i16
ret i16 %res
}
define i32 @clmulh_i32(i32 %a, i32 %b) nounwind {
; SCALAR-LABEL: clmulh_i32:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl %edi, %eax
; SCALAR-NEXT: leaq (%rax,%rax), %rdx
; SCALAR-NEXT: movl %esi, %ecx
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $2, %esi
; SCALAR-NEXT: cmovneq %rdx, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $1, %edi
; SCALAR-NEXT: cmovneq %rax, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: leaq (,%rax,4), %rdx
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $4, %esi
; SCALAR-NEXT: cmovneq %rdx, %rsi
; SCALAR-NEXT: leaq (,%rax,8), %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $8, %edx
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $4, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $16, %edi
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $5, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $32, %r8d
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $6, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $64, %esi
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $7, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $128, %edi
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $8, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $256, %r8d # imm = 0x100
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $9, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $512, %edi # imm = 0x200
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $10, %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $1024, %edx # imm = 0x400
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $11, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $2048, %edi # imm = 0x800
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $12, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $13, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $14, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $15, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $16, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $17, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $18, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $19, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $20, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $21, %r8
; SCALAR-NEXT: movl %ecx, %edx
; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000
; SCALAR-NEXT: cmovneq %r8, %rdx
; SCALAR-NEXT: xorq %rdi, %rdx
; SCALAR-NEXT: xorq %rsi, %rdx
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $22, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $23, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $24, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $25, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $26, %rsi
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000
; SCALAR-NEXT: cmovneq %rsi, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $27, %rsi
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000
; SCALAR-NEXT: cmovneq %rsi, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $28, %rdi
; SCALAR-NEXT: movl %ecx, %esi
; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000
; SCALAR-NEXT: cmovneq %rdi, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: xorq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $29, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $30, %rdx
; SCALAR-NEXT: movl %ecx, %r8d
; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000
; SCALAR-NEXT: cmovneq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdx
; SCALAR-NEXT: shlq $31, %rdx
; SCALAR-NEXT: movl %ecx, %edi
; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000
; SCALAR-NEXT: cmovneq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $32, %r8
; SCALAR-NEXT: xorl %edx, %edx
; SCALAR-NEXT: btq $32, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $33, %rdi
; SCALAR-NEXT: btq $33, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $34, %r8
; SCALAR-NEXT: btq $34, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %r9
; SCALAR-NEXT: shlq $35, %r9
; SCALAR-NEXT: btq $35, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $36, %rdi
; SCALAR-NEXT: btq $36, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r9, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $37, %rsi
; SCALAR-NEXT: btq $37, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $38, %r8
; SCALAR-NEXT: btq $38, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $39, %rsi
; SCALAR-NEXT: btq $39, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $40, %r8
; SCALAR-NEXT: btq $40, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $41, %rsi
; SCALAR-NEXT: btq $41, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $42, %r8
; SCALAR-NEXT: btq $42, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $43, %rsi
; SCALAR-NEXT: btq $43, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $44, %r8
; SCALAR-NEXT: btq $44, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %r9
; SCALAR-NEXT: shlq $45, %r9
; SCALAR-NEXT: btq $45, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r9
; SCALAR-NEXT: xorq %r8, %r9
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $46, %rsi
; SCALAR-NEXT: btq $46, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r9, %rsi
; SCALAR-NEXT: xorq %rdi, %rsi
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $47, %rdi
; SCALAR-NEXT: btq $47, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $48, %r8
; SCALAR-NEXT: btq $48, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $49, %rdi
; SCALAR-NEXT: btq $49, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $50, %r8
; SCALAR-NEXT: btq $50, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $51, %rdi
; SCALAR-NEXT: btq $51, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $52, %r8
; SCALAR-NEXT: btq $52, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $53, %rdi
; SCALAR-NEXT: btq $53, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $54, %r8
; SCALAR-NEXT: btq $54, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $55, %rdi
; SCALAR-NEXT: btq $55, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $56, %r8
; SCALAR-NEXT: btq $56, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rdi, %r8
; SCALAR-NEXT: movq %rax, %rdi
; SCALAR-NEXT: shlq $57, %rdi
; SCALAR-NEXT: btq $57, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %rsi, %rdi
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $58, %rsi
; SCALAR-NEXT: btq $58, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $59, %r8
; SCALAR-NEXT: btq $59, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $60, %rsi
; SCALAR-NEXT: btq $60, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: movq %rax, %r8
; SCALAR-NEXT: shlq $61, %r8
; SCALAR-NEXT: btq $61, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %r8
; SCALAR-NEXT: xorq %rsi, %r8
; SCALAR-NEXT: movq %rax, %rsi
; SCALAR-NEXT: shlq $62, %rsi
; SCALAR-NEXT: btq $62, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rsi
; SCALAR-NEXT: xorq %r8, %rsi
; SCALAR-NEXT: shlq $63, %rax
; SCALAR-NEXT: btq $63, %rcx
; SCALAR-NEXT: cmovaeq %rdx, %rax
; SCALAR-NEXT: xorq %rsi, %rax
; SCALAR-NEXT: xorq %rdi, %rax
; SCALAR-NEXT: shrq $32, %rax
; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: clmulh_i32:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrq $32, %rax
; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulh_i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrq $32, %rax
; AVX-NEXT: # kill: def $eax killed $eax killed $rax
; AVX-NEXT: retq
%a.ext = zext i32 %a to i64
%b.ext = zext i32 %b to i64
%clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
%res.ext = lshr i64 %clmul, 32
%res = trunc i64 %res.ext to i32
ret i32 %res
}
define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
; SCALAR-LABEL: clmulh_i64:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: shrq $4, %rax
; SCALAR-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
; SCALAR-NEXT: andq %rdx, %rax
; SCALAR-NEXT: andq %rdx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rax, %rdi
; SCALAR-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; SCALAR-NEXT: movq %rdi, %rax
; SCALAR-NEXT: andq %rcx, %rax
; SCALAR-NEXT: shrq $2, %rdi
; SCALAR-NEXT: andq %rcx, %rdi
; SCALAR-NEXT: leaq (%rdi,%rax,4), %rdi
; SCALAR-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
; SCALAR-NEXT: movq %rdi, %r8
; SCALAR-NEXT: andq %rax, %r8
; SCALAR-NEXT: shrq %rdi
; SCALAR-NEXT: movq %rdi, %r9
; SCALAR-NEXT: andq %rax, %r9
; SCALAR-NEXT: leaq (%r9,%r8,2), %r8
; SCALAR-NEXT: leaq (%r8,%r8), %r9
; SCALAR-NEXT: bswapq %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: shrq $4, %r10
; SCALAR-NEXT: andq %rdx, %r10
; SCALAR-NEXT: andq %rdx, %rsi
; SCALAR-NEXT: shlq $4, %rsi
; SCALAR-NEXT: orq %r10, %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: andq %rcx, %r10
; SCALAR-NEXT: shrq $2, %rsi
; SCALAR-NEXT: andq %rcx, %rsi
; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi
; SCALAR-NEXT: movq %rsi, %r10
; SCALAR-NEXT: andq %rax, %r10
; SCALAR-NEXT: shrq %rsi
; SCALAR-NEXT: andq %rax, %rsi
; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $2, %r10d
; SCALAR-NEXT: cmovneq %r9, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $1, %r11d
; SCALAR-NEXT: cmovneq %r8, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: leaq (,%r8,4), %r9
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $4, %r10d
; SCALAR-NEXT: cmovneq %r9, %r10
; SCALAR-NEXT: leaq (,%r8,8), %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $8, %r9d
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $4, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $16, %r11d
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $5, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $32, %ebx
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $6, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $64, %r10d
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $7, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $128, %r11d
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $8, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $256, %ebx # imm = 0x100
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $9, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $512, %r11d # imm = 0x200
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $10, %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $11, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $12, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $13, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $14, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $15, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $16, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $17, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $18, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $19, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $20, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $21, %rbx
; SCALAR-NEXT: movl %esi, %r9d
; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000
; SCALAR-NEXT: cmovneq %rbx, %r9
; SCALAR-NEXT: xorq %r11, %r9
; SCALAR-NEXT: xorq %r10, %r9
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $22, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $23, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $24, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $25, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $26, %r10
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000
; SCALAR-NEXT: cmovneq %r10, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $27, %r10
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000
; SCALAR-NEXT: cmovneq %r10, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $28, %r11
; SCALAR-NEXT: movl %esi, %r10d
; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000
; SCALAR-NEXT: cmovneq %r11, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: xorq %r9, %r10
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $29, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $30, %r9
; SCALAR-NEXT: movl %esi, %ebx
; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000
; SCALAR-NEXT: cmovneq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r9
; SCALAR-NEXT: shlq $31, %r9
; SCALAR-NEXT: movl %esi, %r11d
; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000
; SCALAR-NEXT: cmovneq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $32, %rbx
; SCALAR-NEXT: xorl %r9d, %r9d
; SCALAR-NEXT: btq $32, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $33, %r11
; SCALAR-NEXT: btq $33, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $34, %rbx
; SCALAR-NEXT: btq $34, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r14
; SCALAR-NEXT: shlq $35, %r14
; SCALAR-NEXT: btq $35, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r14
; SCALAR-NEXT: xorq %rbx, %r14
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $36, %r11
; SCALAR-NEXT: btq $36, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %r14, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $37, %r10
; SCALAR-NEXT: btq $37, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $38, %rbx
; SCALAR-NEXT: btq $38, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $39, %r10
; SCALAR-NEXT: btq $39, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $40, %rbx
; SCALAR-NEXT: btq $40, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $41, %r10
; SCALAR-NEXT: btq $41, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $42, %rbx
; SCALAR-NEXT: btq $42, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $43, %r10
; SCALAR-NEXT: btq $43, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $44, %rbx
; SCALAR-NEXT: btq $44, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r14
; SCALAR-NEXT: shlq $45, %r14
; SCALAR-NEXT: btq $45, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r14
; SCALAR-NEXT: xorq %rbx, %r14
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $46, %r10
; SCALAR-NEXT: btq $46, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %r14, %r10
; SCALAR-NEXT: xorq %r11, %r10
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $47, %r11
; SCALAR-NEXT: btq $47, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $48, %rbx
; SCALAR-NEXT: btq $48, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $49, %r11
; SCALAR-NEXT: btq $49, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $50, %rbx
; SCALAR-NEXT: btq $50, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $51, %r11
; SCALAR-NEXT: btq $51, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $52, %rbx
; SCALAR-NEXT: btq $52, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $53, %r11
; SCALAR-NEXT: btq $53, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $54, %rbx
; SCALAR-NEXT: btq $54, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $55, %r11
; SCALAR-NEXT: btq $55, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $56, %rbx
; SCALAR-NEXT: btq $56, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r11, %rbx
; SCALAR-NEXT: movq %r8, %r11
; SCALAR-NEXT: shlq $57, %r11
; SCALAR-NEXT: btq $57, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r11
; SCALAR-NEXT: xorq %rbx, %r11
; SCALAR-NEXT: xorq %r10, %r11
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $58, %r10
; SCALAR-NEXT: btq $58, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $59, %rbx
; SCALAR-NEXT: btq $59, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: movq %r8, %r10
; SCALAR-NEXT: shlq $60, %r10
; SCALAR-NEXT: btq $60, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r10
; SCALAR-NEXT: xorq %rbx, %r10
; SCALAR-NEXT: movq %r8, %rbx
; SCALAR-NEXT: shlq $61, %rbx
; SCALAR-NEXT: btq $61, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rbx
; SCALAR-NEXT: xorq %r10, %rbx
; SCALAR-NEXT: shlq $62, %r8
; SCALAR-NEXT: btq $62, %rsi
; SCALAR-NEXT: cmovaeq %r9, %r8
; SCALAR-NEXT: xorq %rbx, %r8
; SCALAR-NEXT: shlq $63, %rdi
; SCALAR-NEXT: btq $63, %rsi
; SCALAR-NEXT: cmovaeq %r9, %rdi
; SCALAR-NEXT: xorq %r8, %rdi
; SCALAR-NEXT: xorq %r11, %rdi
; SCALAR-NEXT: bswapq %rdi
; SCALAR-NEXT: movq %rdi, %rsi
; SCALAR-NEXT: shrq $4, %rsi
; SCALAR-NEXT: andq %rdx, %rsi
; SCALAR-NEXT: andq %rdx, %rdi
; SCALAR-NEXT: shlq $4, %rdi
; SCALAR-NEXT: orq %rsi, %rdi
; SCALAR-NEXT: movq %rdi, %rdx
; SCALAR-NEXT: andq %rcx, %rdx
; SCALAR-NEXT: shrq $2, %rdi
; SCALAR-NEXT: andq %rcx, %rdi
; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rcx
; SCALAR-NEXT: andq %rcx, %rax
; SCALAR-NEXT: shrq %rcx
; SCALAR-NEXT: movabsq $6148914691236517204, %rdx # imm = 0x5555555555555554
; SCALAR-NEXT: andq %rcx, %rdx
; SCALAR-NEXT: leaq (%rdx,%rax,2), %rax
; SCALAR-NEXT: shrq %rax
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: retq
;
; SSE2-PCLMUL-LABEL: clmulh_i64:
; SSE2-PCLMUL: # %bb.0:
; SSE2-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE2-PCLMUL-NEXT: movq %rdi, %xmm1
; SSE2-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE2-PCLMUL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE2-PCLMUL-NEXT: movq %xmm0, %rax
; SSE2-PCLMUL-NEXT: retq
;
; SSE42-PCLMUL-LABEL: clmulh_i64:
; SSE42-PCLMUL: # %bb.0:
; SSE42-PCLMUL-NEXT: movq %rsi, %xmm0
; SSE42-PCLMUL-NEXT: movq %rdi, %xmm1
; SSE42-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE42-PCLMUL-NEXT: pextrq $1, %xmm1, %rax
; SSE42-PCLMUL-NEXT: retq
;
; AVX-LABEL: clmulh_i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovq %rsi, %xmm0
; AVX-NEXT: vmovq %rdi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: retq
%a.ext = zext i64 %a to i128
%b.ext = zext i64 %b to i128
%clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
%res.ext = lshr i128 %clmul, 64
%res = trunc i128 %res.ext to i64
ret i64 %res
}
define i8 @clmul_i8_noimplicitfloat(i8 %a, i8 %b) nounwind noimplicitfloat {
; CHECK-LABEL: clmul_i8_noimplicitfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: leal (%rdi,%rdi), %edx
; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: testb $2, %sil
; CHECK-NEXT: cmovel %ecx, %edx
; CHECK-NEXT: xorl %eax, %edx
; CHECK-NEXT: leal (,%rdi,4), %eax
; CHECK-NEXT: movzbl %al, %r8d
; CHECK-NEXT: testb $4, %sil
; CHECK-NEXT: cmovel %ecx, %r8d
; CHECK-NEXT: leal (,%rdi,8), %eax
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: testb $8, %sil
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: xorl %r8d, %eax
; CHECK-NEXT: xorl %edx, %eax
; CHECK-NEXT: movl %edi, %edx
; CHECK-NEXT: shlb $4, %dl
; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: testb $16, %sil
; CHECK-NEXT: cmovel %ecx, %edx
; CHECK-NEXT: movl %edi, %r8d
; CHECK-NEXT: shlb $5, %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: testb $32, %sil
; CHECK-NEXT: cmovel %ecx, %r8d
; CHECK-NEXT: xorl %edx, %r8d
; CHECK-NEXT: movl %edi, %edx
; CHECK-NEXT: shlb $6, %dl
; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: testb $64, %sil
; CHECK-NEXT: cmovel %ecx, %edx
; CHECK-NEXT: xorl %r8d, %edx
; CHECK-NEXT: xorl %eax, %edx
; CHECK-NEXT: shlb $7, %dil
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: testb $-128, %sil
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: xorl %edx, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.clmul.i8(i8 %a, i8 %b)
ret i8 %res
}
declare void @use(i8)
define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmul_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
; SCALAR-NEXT: xorl %eax, %eax
; SCALAR-NEXT: testb $1, %sil
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: leal (%rdi,%rdi), %r9d
; SCALAR-NEXT: movzbl %r9b, %r9d
; SCALAR-NEXT: testb $2, %sil
; SCALAR-NEXT: cmovel %eax, %r9d
; SCALAR-NEXT: xorl %r8d, %r9d
; SCALAR-NEXT: leal (,%rdi,4), %r8d
; SCALAR-NEXT: movzbl %r8b, %r10d
; SCALAR-NEXT: testb $4, %sil
; SCALAR-NEXT: cmovel %eax, %r10d
; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movzbl %r8b, %r8d
; SCALAR-NEXT: testb $8, %sil
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: xorl %r10d, %r8d
; SCALAR-NEXT: xorl %r9d, %r8d
; SCALAR-NEXT: movl %edi, %r9d
; SCALAR-NEXT: shlb $4, %r9b
; SCALAR-NEXT: movzbl %r9b, %r9d
; SCALAR-NEXT: testb $16, %sil
; SCALAR-NEXT: cmovel %eax, %r9d
; SCALAR-NEXT: movl %edi, %r10d
; SCALAR-NEXT: shlb $5, %r10b
; SCALAR-NEXT: movzbl %r10b, %r10d
; SCALAR-NEXT: testb $32, %sil
; SCALAR-NEXT: cmovel %eax, %r10d
; SCALAR-NEXT: xorl %r9d, %r10d
; SCALAR-NEXT: movl %edi, %r9d
; SCALAR-NEXT: shlb $6, %r9b
; SCALAR-NEXT: movzbl %r9b, %r9d
; SCALAR-NEXT: testb $64, %sil
; SCALAR-NEXT: cmovel %eax, %r9d
; SCALAR-NEXT: xorl %r10d, %r9d
; SCALAR-NEXT: xorl %r8d, %r9d
; SCALAR-NEXT: shlb $7, %dil
; SCALAR-NEXT: movzbl %dil, %edi
; SCALAR-NEXT: testb $-128, %sil
; SCALAR-NEXT: cmovel %eax, %edi
; SCALAR-NEXT: xorl %r9d, %edi
; SCALAR-NEXT: movb %dil, (%rdx)
; SCALAR-NEXT: movb %dil, (%rcx)
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmul_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: movb %al, (%rdx)
; SSE-PCLMUL-NEXT: movb %al, (%rcx)
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: commutative_clmul_i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: movb %al, (%rdx)
; AVX-NEXT: movb %al, (%rcx)
; AVX-NEXT: retq
%xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
%yx = call i8 @llvm.clmul.i8(i8 %y, i8 %x)
store i8 %xy, ptr %p0
store i8 %yx, ptr %p1
ret void
}
define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmulh_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %r13
; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; SCALAR-NEXT: movzbl %sil, %r14d
; SCALAR-NEXT: movl %r14d, %ebp
; SCALAR-NEXT: shll $8, %ebp
; SCALAR-NEXT: movl %r14d, %ebx
; SCALAR-NEXT: shll $9, %ebx
; SCALAR-NEXT: movl %r14d, %r11d
; SCALAR-NEXT: shll $10, %r11d
; SCALAR-NEXT: movl %r14d, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %r14d, %r10d
; SCALAR-NEXT: shll $12, %r10d
; SCALAR-NEXT: movl %r14d, %ecx
; SCALAR-NEXT: shll $13, %ecx
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: shll $14, %r8d
; SCALAR-NEXT: xorl %r15d, %r15d
; SCALAR-NEXT: testw %r15w, %r15w
; SCALAR-NEXT: cmovel %r15d, %r8d
; SCALAR-NEXT: cmovel %r15d, %ecx
; SCALAR-NEXT: cmovel %r15d, %r10d
; SCALAR-NEXT: cmovel %r15d, %eax
; SCALAR-NEXT: cmovel %r15d, %r11d
; SCALAR-NEXT: cmovel %r15d, %ebx
; SCALAR-NEXT: cmovel %r15d, %ebp
; SCALAR-NEXT: shll $15, %esi
; SCALAR-NEXT: testw %r15w, %r15w
; SCALAR-NEXT: cmovel %r15d, %esi
; SCALAR-NEXT: movl %edi, %r15d
; SCALAR-NEXT: andl $1, %r15d
; SCALAR-NEXT: cmovnel %r14d, %r15d
; SCALAR-NEXT: leal (%r14,%r14), %r12d
; SCALAR-NEXT: movl %edi, %r13d
; SCALAR-NEXT: andl $2, %r13d
; SCALAR-NEXT: cmovnel %r12d, %r13d
; SCALAR-NEXT: xorl %r15d, %r13d
; SCALAR-NEXT: leal (,%r14,4), %r15d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $4, %r12d
; SCALAR-NEXT: cmovnel %r15d, %r12d
; SCALAR-NEXT: movl %edi, %r15d
; SCALAR-NEXT: andl $8, %r15d
; SCALAR-NEXT: leal (,%r14,8), %r9d
; SCALAR-NEXT: cmovnel %r9d, %r15d
; SCALAR-NEXT: xorl %r12d, %r15d
; SCALAR-NEXT: xorl %r13d, %r15d
; SCALAR-NEXT: movl %r14d, %r9d
; SCALAR-NEXT: shll $4, %r9d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $16, %r12d
; SCALAR-NEXT: cmovnel %r9d, %r12d
; SCALAR-NEXT: movl %r14d, %r9d
; SCALAR-NEXT: shll $5, %r9d
; SCALAR-NEXT: movl %edi, %r13d
; SCALAR-NEXT: andl $32, %r13d
; SCALAR-NEXT: cmovnel %r9d, %r13d
; SCALAR-NEXT: xorl %r12d, %r13d
; SCALAR-NEXT: movl %r14d, %r9d
; SCALAR-NEXT: shll $6, %r9d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $64, %r12d
; SCALAR-NEXT: cmovnel %r9d, %r12d
; SCALAR-NEXT: xorl %r13d, %r12d
; SCALAR-NEXT: xorl %r15d, %r12d
; SCALAR-NEXT: shll $7, %r14d
; SCALAR-NEXT: andl $128, %edi
; SCALAR-NEXT: cmovnel %r14d, %edi
; SCALAR-NEXT: xorl %ebp, %edi
; SCALAR-NEXT: xorl %ebx, %edi
; SCALAR-NEXT: xorl %r11d, %edi
; SCALAR-NEXT: xorl %r12d, %edi
; SCALAR-NEXT: xorl %r10d, %eax
; SCALAR-NEXT: xorl %ecx, %eax
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %edi, %eax
; SCALAR-NEXT: shrl $8, %eax
; SCALAR-NEXT: movb %al, (%rdx)
; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; SCALAR-NEXT: movb %al, (%rcx)
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r12
; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmulh_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzbl %dil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzbl %sil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $8, %eax
; SSE-PCLMUL-NEXT: movb %al, (%rdx)
; SSE-PCLMUL-NEXT: movb %al, (%rcx)
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: commutative_clmulh_i8:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzbl %sil, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $8, %eax
; AVX-NEXT: movb %al, (%rdx)
; AVX-NEXT: movb %al, (%rcx)
; AVX-NEXT: retq
%x.ext = zext i8 %x to i16
%y.ext = zext i8 %y to i16
%clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext)
%clmul_yx = call i16 @llvm.clmul.i16(i16 %y.ext, i16 %x.ext)
%clmul_xy_lshr = lshr i16 %clmul_xy, 8
%clmul_yx_lshr = lshr i16 %clmul_yx, 8
%clmulh_xy = trunc i16 %clmul_xy_lshr to i8
%clmulh_yx = trunc i16 %clmul_yx_lshr to i8
store i8 %clmulh_xy, ptr %p0
store i8 %clmulh_yx, ptr %p1
ret void
}
define void @commutative_clmulr_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: commutative_clmulr_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %r15
; SCALAR-NEXT: pushq %r14
; SCALAR-NEXT: pushq %r13
; SCALAR-NEXT: pushq %r12
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movzbl %sil, %r14d
; SCALAR-NEXT: movl %r14d, %ebx
; SCALAR-NEXT: shll $8, %ebx
; SCALAR-NEXT: movl %r14d, %r11d
; SCALAR-NEXT: shll $9, %r11d
; SCALAR-NEXT: movl %r14d, %r10d
; SCALAR-NEXT: shll $10, %r10d
; SCALAR-NEXT: movl %r14d, %eax
; SCALAR-NEXT: shll $11, %eax
; SCALAR-NEXT: movl %r14d, %r9d
; SCALAR-NEXT: shll $12, %r9d
; SCALAR-NEXT: movl %esi, %r8d
; SCALAR-NEXT: shll $13, %r8d
; SCALAR-NEXT: xorl %ebp, %ebp
; SCALAR-NEXT: testw %bp, %bp
; SCALAR-NEXT: cmovel %ebp, %r8d
; SCALAR-NEXT: cmovel %ebp, %r9d
; SCALAR-NEXT: cmovel %ebp, %eax
; SCALAR-NEXT: cmovel %ebp, %r10d
; SCALAR-NEXT: cmovel %ebp, %r11d
; SCALAR-NEXT: cmovel %ebp, %ebx
; SCALAR-NEXT: shll $14, %esi
; SCALAR-NEXT: testw %bp, %bp
; SCALAR-NEXT: cmovel %ebp, %esi
; SCALAR-NEXT: movl %edi, %ebp
; SCALAR-NEXT: andl $1, %ebp
; SCALAR-NEXT: cmovnel %r14d, %ebp
; SCALAR-NEXT: leal (%r14,%r14), %r15d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $2, %r12d
; SCALAR-NEXT: cmovnel %r15d, %r12d
; SCALAR-NEXT: xorl %ebp, %r12d
; SCALAR-NEXT: leal (,%r14,4), %ebp
; SCALAR-NEXT: movl %edi, %r15d
; SCALAR-NEXT: andl $4, %r15d
; SCALAR-NEXT: cmovnel %ebp, %r15d
; SCALAR-NEXT: leal (,%r14,8), %r13d
; SCALAR-NEXT: movl %edi, %ebp
; SCALAR-NEXT: andl $8, %ebp
; SCALAR-NEXT: cmovnel %r13d, %ebp
; SCALAR-NEXT: xorl %r15d, %ebp
; SCALAR-NEXT: xorl %r12d, %ebp
; SCALAR-NEXT: movl %r14d, %r15d
; SCALAR-NEXT: shll $4, %r15d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $16, %r12d
; SCALAR-NEXT: cmovnel %r15d, %r12d
; SCALAR-NEXT: movl %r14d, %r15d
; SCALAR-NEXT: shll $5, %r15d
; SCALAR-NEXT: movl %edi, %r13d
; SCALAR-NEXT: andl $32, %r13d
; SCALAR-NEXT: cmovnel %r15d, %r13d
; SCALAR-NEXT: xorl %r12d, %r13d
; SCALAR-NEXT: movl %r14d, %r15d
; SCALAR-NEXT: shll $6, %r15d
; SCALAR-NEXT: movl %edi, %r12d
; SCALAR-NEXT: andl $64, %r12d
; SCALAR-NEXT: cmovnel %r15d, %r12d
; SCALAR-NEXT: xorl %r13d, %r12d
; SCALAR-NEXT: xorl %ebp, %r12d
; SCALAR-NEXT: shll $7, %r14d
; SCALAR-NEXT: andl $128, %edi
; SCALAR-NEXT: cmovnel %r14d, %edi
; SCALAR-NEXT: xorl %ebx, %edi
; SCALAR-NEXT: xorl %r11d, %edi
; SCALAR-NEXT: xorl %r10d, %edi
; SCALAR-NEXT: xorl %r12d, %edi
; SCALAR-NEXT: xorl %r9d, %eax
; SCALAR-NEXT: xorl %r8d, %eax
; SCALAR-NEXT: xorl %esi, %eax
; SCALAR-NEXT: xorl %edi, %eax
; SCALAR-NEXT: shrl $7, %eax
; SCALAR-NEXT: movb %al, (%rdx)
; SCALAR-NEXT: movb %al, (%rcx)
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %r12
; SCALAR-NEXT: popq %r13
; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: popq %r15
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: commutative_clmulr_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: movzbl %dil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm0
; SSE-PCLMUL-NEXT: movzbl %sil, %eax
; SSE-PCLMUL-NEXT: movd %eax, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %rax
; SSE-PCLMUL-NEXT: shrl $7, %eax
; SSE-PCLMUL-NEXT: movb %al, (%rdx)
; SSE-PCLMUL-NEXT: movb %al, (%rcx)
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: commutative_clmulr_i8:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: movzbl %sil, %eax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: shrl $7, %eax
; AVX-NEXT: movb %al, (%rdx)
; AVX-NEXT: movb %al, (%rcx)
; AVX-NEXT: retq
%x.ext = zext i8 %x to i16
%y.ext = zext i8 %y to i16
%clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext)
%clmul_yx = call i16 @llvm.clmul.i16(i16 %y.ext, i16 %x.ext)
%clmul_xy_lshr = lshr i16 %clmul_xy, 7
%clmul_yx_lshr = lshr i16 %clmul_yx, 7
%clmulh_xy = trunc i16 %clmul_xy_lshr to i8
%clmulh_yx = trunc i16 %clmul_yx_lshr to i8
store i8 %clmulh_xy, ptr %p0
store i8 %clmulh_yx, ptr %p1
ret void
}
define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind {
; SCALAR-LABEL: mul_use_commutative_clmul_i8:
; SCALAR: # %bb.0:
; SCALAR-NEXT: pushq %rbp
; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: pushq %rax
; SCALAR-NEXT: movq %rcx, %rbx
; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi
; SCALAR-NEXT: xorl %eax, %eax
; SCALAR-NEXT: testb $1, %sil
; SCALAR-NEXT: movl %edi, %ebp
; SCALAR-NEXT: cmovel %eax, %ebp
; SCALAR-NEXT: leal (%rdi,%rdi), %ecx
; SCALAR-NEXT: movzbl %cl, %ecx
; SCALAR-NEXT: testb $2, %sil
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: xorl %ecx, %ebp
; SCALAR-NEXT: leal (,%rdi,4), %ecx
; SCALAR-NEXT: movzbl %cl, %ecx
; SCALAR-NEXT: testb $4, %sil
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: leal (,%rdi,8), %r8d
; SCALAR-NEXT: movzbl %r8b, %r8d
; SCALAR-NEXT: testb $8, %sil
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: xorl %ecx, %r8d
; SCALAR-NEXT: xorl %r8d, %ebp
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shlb $4, %cl
; SCALAR-NEXT: movzbl %cl, %ecx
; SCALAR-NEXT: testb $16, %sil
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: movl %edi, %r8d
; SCALAR-NEXT: shlb $5, %r8b
; SCALAR-NEXT: movzbl %r8b, %r8d
; SCALAR-NEXT: testb $32, %sil
; SCALAR-NEXT: cmovel %eax, %r8d
; SCALAR-NEXT: xorl %ecx, %r8d
; SCALAR-NEXT: movl %edi, %ecx
; SCALAR-NEXT: shlb $6, %cl
; SCALAR-NEXT: movzbl %cl, %ecx
; SCALAR-NEXT: testb $64, %sil
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: xorl %r8d, %ecx
; SCALAR-NEXT: xorl %ecx, %ebp
; SCALAR-NEXT: shlb $7, %dil
; SCALAR-NEXT: movzbl %dil, %ecx
; SCALAR-NEXT: testb $-128, %sil
; SCALAR-NEXT: cmovel %eax, %ecx
; SCALAR-NEXT: xorl %ecx, %ebp
; SCALAR-NEXT: movb %bpl, (%rdx)
; SCALAR-NEXT: movl %ebp, %edi
; SCALAR-NEXT: callq use@PLT
; SCALAR-NEXT: movb %bpl, (%rbx)
; SCALAR-NEXT: addq $8, %rsp
; SCALAR-NEXT: popq %rbx
; SCALAR-NEXT: popq %rbp
; SCALAR-NEXT: retq
;
; SSE-PCLMUL-LABEL: mul_use_commutative_clmul_i8:
; SSE-PCLMUL: # %bb.0:
; SSE-PCLMUL-NEXT: pushq %r14
; SSE-PCLMUL-NEXT: pushq %rbx
; SSE-PCLMUL-NEXT: pushq %rax
; SSE-PCLMUL-NEXT: movq %rcx, %rbx
; SSE-PCLMUL-NEXT: movd %esi, %xmm0
; SSE-PCLMUL-NEXT: movd %edi, %xmm1
; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
; SSE-PCLMUL-NEXT: movq %xmm1, %r14
; SSE-PCLMUL-NEXT: movb %r14b, (%rdx)
; SSE-PCLMUL-NEXT: movl %r14d, %edi
; SSE-PCLMUL-NEXT: callq use@PLT
; SSE-PCLMUL-NEXT: movb %r14b, (%rbx)
; SSE-PCLMUL-NEXT: addq $8, %rsp
; SSE-PCLMUL-NEXT: popq %rbx
; SSE-PCLMUL-NEXT: popq %r14
; SSE-PCLMUL-NEXT: retq
;
; AVX-LABEL: mul_use_commutative_clmul_i8:
; AVX: # %bb.0:
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
; AVX-NEXT: pushq %rax
; AVX-NEXT: movq %rcx, %rbx
; AVX-NEXT: vmovd %esi, %xmm0
; AVX-NEXT: vmovd %edi, %xmm1
; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovq %xmm0, %r14
; AVX-NEXT: movb %r14b, (%rdx)
; AVX-NEXT: movl %r14d, %edi
; AVX-NEXT: callq use@PLT
; AVX-NEXT: movb %r14b, (%rbx)
; AVX-NEXT: addq $8, %rsp
; AVX-NEXT: popq %rbx
; AVX-NEXT: popq %r14
; AVX-NEXT: retq
%xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
%yx = call i8 @llvm.clmul.i8(i8 %y, i8 %x)
store i8 %xy, ptr %p0
call void @use(i8 %xy)
store i8 %yx, ptr %p1
ret void
}