| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SCALAR |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,+pclmul | FileCheck %s --check-prefixes=CHECK,SSE-PCLMUL,SSE2-PCLMUL |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,SSE-PCLMUL,SSE42-PCLMUL |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+pclmul | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+vpclmulqdq | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+vpclmulqdq | FileCheck %s --check-prefixes=CHECK,AVX |
| |
| define i8 @clmul_i8(i8 %a, i8 %b) nounwind { |
| ; SCALAR-LABEL: clmul_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi |
| ; SCALAR-NEXT: xorl %ecx, %ecx |
| ; SCALAR-NEXT: testb $1, %sil |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: cmovel %ecx, %eax |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %edx |
| ; SCALAR-NEXT: movzbl %dl, %edx |
| ; SCALAR-NEXT: testb $2, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %edx |
| ; SCALAR-NEXT: xorl %eax, %edx |
| ; SCALAR-NEXT: leal (,%rdi,4), %eax |
| ; SCALAR-NEXT: movzbl %al, %r8d |
| ; SCALAR-NEXT: testb $4, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %r8d |
| ; SCALAR-NEXT: leal (,%rdi,8), %eax |
| ; SCALAR-NEXT: movzbl %al, %eax |
| ; SCALAR-NEXT: testb $8, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %eax |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shlb $4, %dl |
| ; SCALAR-NEXT: movzbl %dl, %edx |
| ; SCALAR-NEXT: testb $16, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shlb $5, %r8b |
| ; SCALAR-NEXT: movzbl %r8b, %r8d |
| ; SCALAR-NEXT: testb $32, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shlb $6, %dl |
| ; SCALAR-NEXT: movzbl %dl, %edx |
| ; SCALAR-NEXT: testb $64, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: xorl %eax, %edx |
| ; SCALAR-NEXT: shlb $7, %dil |
| ; SCALAR-NEXT: movzbl %dil, %eax |
| ; SCALAR-NEXT: testb $-128, %sil |
| ; SCALAR-NEXT: cmovel %ecx, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: # kill: def $al killed $al killed $eax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmul_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmul_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: # kill: def $al killed $al killed $rax |
| ; AVX-NEXT: retq |
| %res = call i8 @llvm.clmul.i8(i8 %a, i8 %b) |
| ret i8 %res |
| } |
| |
| define i16 @clmul_i16(i16 %a, i16 %b) nounwind { |
| ; SCALAR-LABEL: clmul_i16: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %eax |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $2, %ecx |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $1, %eax |
| ; SCALAR-NEXT: cmovnel %edi, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: leal (,%rdi,4), %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $4, %edx |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: leal (,%rdi,8), %r8d |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $8, %ecx |
| ; SCALAR-NEXT: cmovnel %r8d, %ecx |
| ; SCALAR-NEXT: xorl %edx, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $4, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $16, %edx |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $5, %eax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $32, %r8d |
| ; SCALAR-NEXT: cmovnel %eax, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $6, %edx |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $64, %eax |
| ; SCALAR-NEXT: cmovnel %edx, %eax |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $7, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $128, %edx |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $8, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $256, %r8d # imm = 0x100 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $9, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $512, %edx # imm = 0x200 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $10, %r8d |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $1024, %ecx # imm = 0x400 |
| ; SCALAR-NEXT: cmovnel %r8d, %ecx |
| ; SCALAR-NEXT: xorl %edx, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $2048, %edx # imm = 0x800 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $12, %eax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000 |
| ; SCALAR-NEXT: cmovnel %eax, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $13, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $14, %r8d |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $16384, %eax # imm = 0x4000 |
| ; SCALAR-NEXT: cmovnel %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: shll $15, %edi |
| ; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000 |
| ; SCALAR-NEXT: cmovnel %edi, %esi |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmul_i16: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmul_i16: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: # kill: def $ax killed $ax killed $rax |
| ; AVX-NEXT: retq |
| %res = call i16 @llvm.clmul.i16(i16 %a, i16 %b) |
| ret i16 %res |
| } |
| |
| define i32 @clmul_i32(i32 %a, i32 %b) nounwind { |
| ; SCALAR-LABEL: clmul_i32: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %eax |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $2, %ecx |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $1, %edx |
| ; SCALAR-NEXT: cmovnel %edi, %edx |
| ; SCALAR-NEXT: xorl %ecx, %edx |
| ; SCALAR-NEXT: leal (,%rdi,4), %eax |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $4, %ecx |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: leal (,%rdi,8), %r8d |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $8, %eax |
| ; SCALAR-NEXT: cmovnel %r8d, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $4, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $16, %edx |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $5, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $32, %r8d |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $6, %edx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $64, %ecx |
| ; SCALAR-NEXT: cmovnel %edx, %ecx |
| ; SCALAR-NEXT: xorl %r8d, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $7, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $128, %edx |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $8, %eax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $256, %r8d # imm = 0x100 |
| ; SCALAR-NEXT: cmovnel %eax, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $9, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $512, %edx # imm = 0x200 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $10, %r8d |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $1024, %eax # imm = 0x400 |
| ; SCALAR-NEXT: cmovnel %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $11, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $2048, %edx # imm = 0x800 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $12, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $13, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $14, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $15, %edx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000 |
| ; SCALAR-NEXT: cmovnel %edx, %ecx |
| ; SCALAR-NEXT: xorl %r8d, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $16, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $17, %eax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000 |
| ; SCALAR-NEXT: cmovnel %eax, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $18, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $19, %eax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000 |
| ; SCALAR-NEXT: cmovnel %eax, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $20, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $21, %r8d |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000 |
| ; SCALAR-NEXT: cmovnel %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $22, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $23, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $24, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $25, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $26, %ecx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %r8d, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $27, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovnel %ecx, %r8d |
| ; SCALAR-NEXT: xorl %edx, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $28, %edx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovnel %edx, %ecx |
| ; SCALAR-NEXT: xorl %r8d, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $29, %eax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $30, %r8d |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $1073741824, %eax # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovnel %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: shll $31, %edi |
| ; SCALAR-NEXT: andl $-2147483648, %esi # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovnel %edi, %esi |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmul_i32: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmul_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: # kill: def $eax killed $eax killed $rax |
| ; AVX-NEXT: retq |
| %res = call i32 @llvm.clmul.i32(i32 %a, i32 %b) |
| ret i32 %res |
| } |
| |
| define i64 @clmul_i64(i64 %a, i64 %b) nounwind { |
| ; SCALAR-LABEL: clmul_i64: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: leaq (%rdi,%rdi), %rax |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $2, %ecx |
| ; SCALAR-NEXT: cmovneq %rax, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $1, %edx |
| ; SCALAR-NEXT: cmovneq %rdi, %rdx |
| ; SCALAR-NEXT: xorq %rcx, %rdx |
| ; SCALAR-NEXT: leaq (,%rdi,4), %rax |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $4, %ecx |
| ; SCALAR-NEXT: cmovneq %rax, %rcx |
| ; SCALAR-NEXT: leaq (,%rdi,8), %r8 |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $8, %eax |
| ; SCALAR-NEXT: cmovneq %r8, %rax |
| ; SCALAR-NEXT: xorq %rcx, %rax |
| ; SCALAR-NEXT: xorq %rdx, %rax |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $4, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $16, %edx |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $5, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $32, %r8d |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $6, %rdx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $64, %ecx |
| ; SCALAR-NEXT: cmovneq %rdx, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: xorq %rax, %rcx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $7, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $128, %edx |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $8, %rax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $256, %r8d # imm = 0x100 |
| ; SCALAR-NEXT: cmovneq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $9, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $512, %edx # imm = 0x200 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $10, %r8 |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $1024, %eax # imm = 0x400 |
| ; SCALAR-NEXT: cmovneq %r8, %rax |
| ; SCALAR-NEXT: xorq %rdx, %rax |
| ; SCALAR-NEXT: xorq %rcx, %rax |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $11, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $2048, %edx # imm = 0x800 |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $12, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000 |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $13, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $8192, %edx # imm = 0x2000 |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $14, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000 |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $15, %rdx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $32768, %ecx # imm = 0x8000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: xorq %rax, %rcx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $16, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $65536, %edx # imm = 0x10000 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $17, %rax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000 |
| ; SCALAR-NEXT: cmovneq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $18, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $262144, %edx # imm = 0x40000 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $19, %rax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000 |
| ; SCALAR-NEXT: cmovneq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $20, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $1048576, %edx # imm = 0x100000 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $21, %r8 |
| ; SCALAR-NEXT: movl %esi, %eax |
| ; SCALAR-NEXT: andl $2097152, %eax # imm = 0x200000 |
| ; SCALAR-NEXT: cmovneq %r8, %rax |
| ; SCALAR-NEXT: xorq %rdx, %rax |
| ; SCALAR-NEXT: xorq %rcx, %rax |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $22, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $4194304, %edx # imm = 0x400000 |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $23, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000 |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $24, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $16777216, %edx # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $25, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $26, %rcx |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $67108864, %edx # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovneq %rcx, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $27, %rcx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovneq %rcx, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $28, %rdx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: andl $268435456, %ecx # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: xorq %rax, %rcx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $29, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $536870912, %edx # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $30, %rax |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovneq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shlq $31, %rax |
| ; SCALAR-NEXT: movl %esi, %edx |
| ; SCALAR-NEXT: andl $-2147483648, %edx # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovneq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $32, %r8 |
| ; SCALAR-NEXT: xorl %eax, %eax |
| ; SCALAR-NEXT: btq $32, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $33, %rdx |
| ; SCALAR-NEXT: btq $33, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $34, %r8 |
| ; SCALAR-NEXT: btq $34, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %r9 |
| ; SCALAR-NEXT: shlq $35, %r9 |
| ; SCALAR-NEXT: btq $35, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $36, %rdx |
| ; SCALAR-NEXT: btq $36, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r9, %rdx |
| ; SCALAR-NEXT: xorq %rcx, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $37, %rcx |
| ; SCALAR-NEXT: btq $37, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $38, %r8 |
| ; SCALAR-NEXT: btq $38, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $39, %rcx |
| ; SCALAR-NEXT: btq $39, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $40, %r8 |
| ; SCALAR-NEXT: btq $40, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $41, %rcx |
| ; SCALAR-NEXT: btq $41, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $42, %r8 |
| ; SCALAR-NEXT: btq $42, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $43, %rcx |
| ; SCALAR-NEXT: btq $43, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $44, %r8 |
| ; SCALAR-NEXT: btq $44, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %r9 |
| ; SCALAR-NEXT: shlq $45, %r9 |
| ; SCALAR-NEXT: btq $45, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $46, %rcx |
| ; SCALAR-NEXT: btq $46, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r9, %rcx |
| ; SCALAR-NEXT: xorq %rdx, %rcx |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $47, %rdx |
| ; SCALAR-NEXT: btq $47, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $48, %r8 |
| ; SCALAR-NEXT: btq $48, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $49, %rdx |
| ; SCALAR-NEXT: btq $49, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $50, %r8 |
| ; SCALAR-NEXT: btq $50, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $51, %rdx |
| ; SCALAR-NEXT: btq $51, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $52, %r8 |
| ; SCALAR-NEXT: btq $52, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $53, %rdx |
| ; SCALAR-NEXT: btq $53, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $54, %r8 |
| ; SCALAR-NEXT: btq $54, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $55, %rdx |
| ; SCALAR-NEXT: btq $55, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $56, %r8 |
| ; SCALAR-NEXT: btq $56, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rdx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: shlq $57, %rdx |
| ; SCALAR-NEXT: btq $57, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rdx |
| ; SCALAR-NEXT: xorq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rcx, %rdx |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $58, %rcx |
| ; SCALAR-NEXT: btq $58, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $59, %r8 |
| ; SCALAR-NEXT: btq $59, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $60, %rcx |
| ; SCALAR-NEXT: btq $60, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: shlq $61, %r8 |
| ; SCALAR-NEXT: btq $61, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %r8 |
| ; SCALAR-NEXT: xorq %rcx, %r8 |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: shlq $62, %rcx |
| ; SCALAR-NEXT: btq $62, %rsi |
| ; SCALAR-NEXT: cmovaeq %rax, %rcx |
| ; SCALAR-NEXT: xorq %r8, %rcx |
| ; SCALAR-NEXT: shlq $63, %rdi |
| ; SCALAR-NEXT: btq $63, %rsi |
| ; SCALAR-NEXT: cmovbq %rdi, %rax |
| ; SCALAR-NEXT: xorq %rcx, %rax |
| ; SCALAR-NEXT: xorq %rdx, %rax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmul_i64: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movq %rsi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movq %rdi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmul_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovq %rsi, %xmm0 |
| ; AVX-NEXT: vmovq %rdi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: retq |
| %res = call i64 @llvm.clmul.i64(i64 %a, i64 %b) |
| ret i64 %res |
| } |
| |
| define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { |
| ; SCALAR-LABEL: clmulr_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movzbl %dil, %ecx |
| ; SCALAR-NEXT: movl %ecx, %r11d |
| ; SCALAR-NEXT: shll $8, %r11d |
| ; SCALAR-NEXT: movl %ecx, %r10d |
| ; SCALAR-NEXT: shll $9, %r10d |
| ; SCALAR-NEXT: movl %ecx, %r9d |
| ; SCALAR-NEXT: shll $10, %r9d |
| ; SCALAR-NEXT: movl %ecx, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: shll $12, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $13, %edx |
| ; SCALAR-NEXT: xorl %ebx, %ebx |
| ; SCALAR-NEXT: testw %bx, %bx |
| ; SCALAR-NEXT: cmovel %ebx, %edx |
| ; SCALAR-NEXT: cmovel %ebx, %r8d |
| ; SCALAR-NEXT: cmovel %ebx, %eax |
| ; SCALAR-NEXT: cmovel %ebx, %r9d |
| ; SCALAR-NEXT: cmovel %ebx, %r10d |
| ; SCALAR-NEXT: cmovel %ebx, %r11d |
| ; SCALAR-NEXT: shll $14, %edi |
| ; SCALAR-NEXT: testw %bx, %bx |
| ; SCALAR-NEXT: cmovnel %edi, %ebx |
| ; SCALAR-NEXT: movl %esi, %edi |
| ; SCALAR-NEXT: andl $1, %edi |
| ; SCALAR-NEXT: cmovnel %ecx, %edi |
| ; SCALAR-NEXT: leal (%rcx,%rcx), %ebp |
| ; SCALAR-NEXT: movl %esi, %r14d |
| ; SCALAR-NEXT: andl $2, %r14d |
| ; SCALAR-NEXT: cmovnel %ebp, %r14d |
| ; SCALAR-NEXT: xorl %edi, %r14d |
| ; SCALAR-NEXT: leal (,%rcx,4), %edi |
| ; SCALAR-NEXT: movl %esi, %ebp |
| ; SCALAR-NEXT: andl $4, %ebp |
| ; SCALAR-NEXT: cmovnel %edi, %ebp |
| ; SCALAR-NEXT: leal (,%rcx,8), %r15d |
| ; SCALAR-NEXT: movl %esi, %edi |
| ; SCALAR-NEXT: andl $8, %edi |
| ; SCALAR-NEXT: cmovnel %r15d, %edi |
| ; SCALAR-NEXT: xorl %ebp, %edi |
| ; SCALAR-NEXT: xorl %r14d, %edi |
| ; SCALAR-NEXT: movl %ecx, %ebp |
| ; SCALAR-NEXT: shll $4, %ebp |
| ; SCALAR-NEXT: movl %esi, %r14d |
| ; SCALAR-NEXT: andl $16, %r14d |
| ; SCALAR-NEXT: cmovnel %ebp, %r14d |
| ; SCALAR-NEXT: movl %ecx, %ebp |
| ; SCALAR-NEXT: shll $5, %ebp |
| ; SCALAR-NEXT: movl %esi, %r15d |
| ; SCALAR-NEXT: andl $32, %r15d |
| ; SCALAR-NEXT: cmovnel %ebp, %r15d |
| ; SCALAR-NEXT: xorl %r14d, %r15d |
| ; SCALAR-NEXT: movl %ecx, %ebp |
| ; SCALAR-NEXT: shll $6, %ebp |
| ; SCALAR-NEXT: movl %esi, %r14d |
| ; SCALAR-NEXT: andl $64, %r14d |
| ; SCALAR-NEXT: cmovnel %ebp, %r14d |
| ; SCALAR-NEXT: xorl %r15d, %r14d |
| ; SCALAR-NEXT: xorl %edi, %r14d |
| ; SCALAR-NEXT: shll $7, %ecx |
| ; SCALAR-NEXT: andl $128, %esi |
| ; SCALAR-NEXT: cmovel %esi, %ecx |
| ; SCALAR-NEXT: xorl %r11d, %ecx |
| ; SCALAR-NEXT: xorl %r10d, %ecx |
| ; SCALAR-NEXT: xorl %r9d, %ecx |
| ; SCALAR-NEXT: xorl %r14d, %ecx |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: xorl %ebx, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: shrl $7, %eax |
| ; SCALAR-NEXT: # kill: def $al killed $al killed $eax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulr_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzbl %sil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzbl %dil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $7, %eax |
| ; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulr_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzbl %sil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzbl %dil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $7, %eax |
| ; AVX-NEXT: # kill: def $al killed $al killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i8 %a to i16 |
| %b.ext = zext i8 %b to i16 |
| %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext) |
| %res.ext = lshr i16 %clmul, 7 |
| %res = trunc i16 %res.ext to i8 |
| ret i8 %res |
| } |
| |
| define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { |
| ; SCALAR-LABEL: clmulr_i16: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %r13 |
| ; SCALAR-NEXT: pushq %r12 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: movl %edi, %r13d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: movl %edi, %r15d |
| ; SCALAR-NEXT: movl %edi, %r9d |
| ; SCALAR-NEXT: movl %edi, %ebp |
| ; SCALAR-NEXT: movl %edi, %ebx |
| ; SCALAR-NEXT: movl %edi, %r11d |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: movl %edi, %esi |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: movl %edi, %r14d |
| ; SCALAR-NEXT: shll $16, %r14d |
| ; SCALAR-NEXT: shll $17, %ecx |
| ; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: shll $18, %r13d |
| ; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: shll $19, %r12d |
| ; SCALAR-NEXT: movl %r12d, %r13d |
| ; SCALAR-NEXT: shll $20, %r15d |
| ; SCALAR-NEXT: movl %r15d, %r12d |
| ; SCALAR-NEXT: shll $21, %r9d |
| ; SCALAR-NEXT: movl %r9d, %r15d |
| ; SCALAR-NEXT: shll $22, %ebp |
| ; SCALAR-NEXT: shll $23, %ebx |
| ; SCALAR-NEXT: movl %ebx, %r9d |
| ; SCALAR-NEXT: shll $24, %r11d |
| ; SCALAR-NEXT: movl %r11d, %ebx |
| ; SCALAR-NEXT: shll $25, %r8d |
| ; SCALAR-NEXT: movl %r8d, %r11d |
| ; SCALAR-NEXT: shll $26, %eax |
| ; SCALAR-NEXT: movl %eax, %r8d |
| ; SCALAR-NEXT: shll $27, %esi |
| ; SCALAR-NEXT: shll $28, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $29, %ecx |
| ; SCALAR-NEXT: xorl %eax, %eax |
| ; SCALAR-NEXT: testl $0, %eax |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %edx |
| ; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %esi |
| ; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r11d |
| ; SCALAR-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %ebx |
| ; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r9d |
| ; SCALAR-NEXT: cmovel %eax, %ebp |
| ; SCALAR-NEXT: cmovel %eax, %r15d |
| ; SCALAR-NEXT: movl %r15d, %r8d |
| ; SCALAR-NEXT: cmovel %eax, %r12d |
| ; SCALAR-NEXT: movl %r12d, %r15d |
| ; SCALAR-NEXT: cmovel %eax, %r13d |
| ; SCALAR-NEXT: movl %r13d, %r12d |
| ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Reload |
| ; SCALAR-NEXT: cmovel %eax, %r13d |
| ; SCALAR-NEXT: cmovel %eax, %r14d |
| ; SCALAR-NEXT: movl %edi, %r11d |
| ; SCALAR-NEXT: shll $30, %r11d |
| ; SCALAR-NEXT: testl $0, %eax |
| ; SCALAR-NEXT: cmovel %eax, %r11d |
| ; SCALAR-NEXT: movzwl %di, %edi |
| ; SCALAR-NEXT: movl %r10d, %eax |
| ; SCALAR-NEXT: andl $1, %eax |
| ; SCALAR-NEXT: cmovnel %edi, %eax |
| ; SCALAR-NEXT: movl %r10d, %ecx |
| ; SCALAR-NEXT: andl $2, %ecx |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %esi |
| ; SCALAR-NEXT: cmovnel %esi, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %r10d, %eax |
| ; SCALAR-NEXT: andl $4, %eax |
| ; SCALAR-NEXT: leal (,%rdi,4), %esi |
| ; SCALAR-NEXT: cmovnel %esi, %eax |
| ; SCALAR-NEXT: movl %r10d, %esi |
| ; SCALAR-NEXT: andl $8, %esi |
| ; SCALAR-NEXT: leal (,%rdi,8), %ebx |
| ; SCALAR-NEXT: cmovnel %ebx, %esi |
| ; SCALAR-NEXT: xorl %eax, %esi |
| ; SCALAR-NEXT: xorl %ecx, %esi |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $4, %eax |
| ; SCALAR-NEXT: movl %r10d, %ecx |
| ; SCALAR-NEXT: andl $16, %ecx |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $5, %eax |
| ; SCALAR-NEXT: movl %r10d, %ebx |
| ; SCALAR-NEXT: andl $32, %ebx |
| ; SCALAR-NEXT: cmovnel %eax, %ebx |
| ; SCALAR-NEXT: xorl %ecx, %ebx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $6, %ecx |
| ; SCALAR-NEXT: movl %r10d, %eax |
| ; SCALAR-NEXT: andl $64, %eax |
| ; SCALAR-NEXT: cmovnel %ecx, %eax |
| ; SCALAR-NEXT: xorl %ebx, %eax |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $7, %ecx |
| ; SCALAR-NEXT: movl %r10d, %esi |
| ; SCALAR-NEXT: andl $128, %esi |
| ; SCALAR-NEXT: cmovnel %ecx, %esi |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $8, %ecx |
| ; SCALAR-NEXT: movl %r10d, %ebx |
| ; SCALAR-NEXT: andl $256, %ebx # imm = 0x100 |
| ; SCALAR-NEXT: cmovnel %ecx, %ebx |
| ; SCALAR-NEXT: xorl %esi, %ebx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $9, %ecx |
| ; SCALAR-NEXT: movl %r10d, %edx |
| ; SCALAR-NEXT: andl $512, %edx # imm = 0x200 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %ebx, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $10, %ecx |
| ; SCALAR-NEXT: movl %r10d, %esi |
| ; SCALAR-NEXT: andl $1024, %esi # imm = 0x400 |
| ; SCALAR-NEXT: cmovnel %ecx, %esi |
| ; SCALAR-NEXT: xorl %edx, %esi |
| ; SCALAR-NEXT: xorl %eax, %esi |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %r10d, %ecx |
| ; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800 |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $12, %eax |
| ; SCALAR-NEXT: movl %r10d, %edx |
| ; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $13, %eax |
| ; SCALAR-NEXT: movl %r10d, %ecx |
| ; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000 |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: xorl %edx, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $14, %eax |
| ; SCALAR-NEXT: movl %r10d, %edx |
| ; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %ecx, %edx |
| ; SCALAR-NEXT: shll $15, %edi |
| ; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000 |
| ; SCALAR-NEXT: cmovel %r10d, %edi |
| ; SCALAR-NEXT: xorl %edx, %edi |
| ; SCALAR-NEXT: xorl %esi, %edi |
| ; SCALAR-NEXT: xorl %r13d, %r14d |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %r12d, %r14d |
| ; SCALAR-NEXT: xorl %r15d, %r14d |
| ; SCALAR-NEXT: xorl %r8d, %r14d |
| ; SCALAR-NEXT: xorl %edi, %r14d |
| ; SCALAR-NEXT: xorl %r9d, %ebp |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %r14d, %ebp |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %ebp, %r11d |
| ; SCALAR-NEXT: shrl $15, %r11d |
| ; SCALAR-NEXT: movl %r11d, %eax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r12 |
| ; SCALAR-NEXT: popq %r13 |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulr_i16: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzwl %si, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzwl %di, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $15, %eax |
| ; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulr_i16: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzwl %si, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzwl %di, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $15, %eax |
| ; AVX-NEXT: # kill: def $ax killed $ax killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i16 %a to i32 |
| %b.ext = zext i16 %b to i32 |
| %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext) |
| %res.ext = lshr i32 %clmul, 15 |
| %res = trunc i32 %res.ext to i16 |
| ret i16 %res |
| } |
| |
| define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { |
| ; SCALAR-LABEL: clmulr_i32: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: leaq (%rax,%rax), %rdx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $2, %esi |
| ; SCALAR-NEXT: cmovneq %rdx, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $1, %edi |
| ; SCALAR-NEXT: cmovneq %rax, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: leaq (,%rax,4), %rdx |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $4, %esi |
| ; SCALAR-NEXT: cmovneq %rdx, %rsi |
| ; SCALAR-NEXT: leaq (,%rax,8), %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $8, %edx |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $4, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $16, %edi |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $5, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $32, %r8d |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $6, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $64, %esi |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $7, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $128, %edi |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $8, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $256, %r8d # imm = 0x100 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $9, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $512, %edi # imm = 0x200 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $10, %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $1024, %edx # imm = 0x400 |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $11, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $2048, %edi # imm = 0x800 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $12, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $13, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $14, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $15, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000 |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $16, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $17, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $18, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $19, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $20, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $21, %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000 |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $22, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $23, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $24, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $25, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $26, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $27, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $28, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $29, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $30, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $31, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $32, %r8 |
| ; SCALAR-NEXT: xorl %edx, %edx |
| ; SCALAR-NEXT: btq $32, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $33, %rdi |
| ; SCALAR-NEXT: btq $33, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $34, %r8 |
| ; SCALAR-NEXT: btq $34, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %r9 |
| ; SCALAR-NEXT: shlq $35, %r9 |
| ; SCALAR-NEXT: btq $35, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $36, %rdi |
| ; SCALAR-NEXT: btq $36, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r9, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $37, %rsi |
| ; SCALAR-NEXT: btq $37, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $38, %r8 |
| ; SCALAR-NEXT: btq $38, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $39, %rsi |
| ; SCALAR-NEXT: btq $39, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $40, %r8 |
| ; SCALAR-NEXT: btq $40, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $41, %rsi |
| ; SCALAR-NEXT: btq $41, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $42, %r8 |
| ; SCALAR-NEXT: btq $42, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $43, %rsi |
| ; SCALAR-NEXT: btq $43, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $44, %r8 |
| ; SCALAR-NEXT: btq $44, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %r9 |
| ; SCALAR-NEXT: shlq $45, %r9 |
| ; SCALAR-NEXT: btq $45, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $46, %rsi |
| ; SCALAR-NEXT: btq $46, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r9, %rsi |
| ; SCALAR-NEXT: xorq %rdi, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $47, %rdi |
| ; SCALAR-NEXT: btq $47, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $48, %r8 |
| ; SCALAR-NEXT: btq $48, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $49, %rdi |
| ; SCALAR-NEXT: btq $49, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $50, %r8 |
| ; SCALAR-NEXT: btq $50, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $51, %rdi |
| ; SCALAR-NEXT: btq $51, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $52, %r8 |
| ; SCALAR-NEXT: btq $52, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $53, %rdi |
| ; SCALAR-NEXT: btq $53, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $54, %r8 |
| ; SCALAR-NEXT: btq $54, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $55, %rdi |
| ; SCALAR-NEXT: btq $55, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $56, %r8 |
| ; SCALAR-NEXT: btq $56, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $57, %rdi |
| ; SCALAR-NEXT: btq $57, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $58, %rsi |
| ; SCALAR-NEXT: btq $58, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $59, %r8 |
| ; SCALAR-NEXT: btq $59, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $60, %rsi |
| ; SCALAR-NEXT: btq $60, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $61, %r8 |
| ; SCALAR-NEXT: btq $61, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: shlq $62, %rax |
| ; SCALAR-NEXT: btq $62, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rax |
| ; SCALAR-NEXT: xorq %r8, %rax |
| ; SCALAR-NEXT: xorq %rdi, %rax |
| ; SCALAR-NEXT: shrq $31, %rax |
| ; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulr_i32: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrq $31, %rax |
| ; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulr_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrq $31, %rax |
| ; AVX-NEXT: # kill: def $eax killed $eax killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i32 %a to i64 |
| %b.ext = zext i32 %b to i64 |
| %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) |
| %res.ext = lshr i64 %clmul, 31 |
| %res = trunc i64 %res.ext to i32 |
| ret i32 %res |
| } |
| |
| define i64 @clmulr_i64(i64 %a, i64 %b) nounwind { |
| ; SCALAR-LABEL: clmulr_i64: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: bswapq %rdi |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shrq $4, %rax |
| ; SCALAR-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F |
| ; SCALAR-NEXT: andq %rcx, %rax |
| ; SCALAR-NEXT: andq %rcx, %rdi |
| ; SCALAR-NEXT: shlq $4, %rdi |
| ; SCALAR-NEXT: orq %rax, %rdi |
| ; SCALAR-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: andq %rax, %rdx |
| ; SCALAR-NEXT: shrq $2, %rdi |
| ; SCALAR-NEXT: andq %rax, %rdi |
| ; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rdi |
| ; SCALAR-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555 |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: andq %rdx, %r8 |
| ; SCALAR-NEXT: shrq %rdi |
| ; SCALAR-NEXT: movq %rdi, %r9 |
| ; SCALAR-NEXT: andq %rdx, %r9 |
| ; SCALAR-NEXT: leaq (%r9,%r8,2), %r8 |
| ; SCALAR-NEXT: leaq (%r8,%r8), %r9 |
| ; SCALAR-NEXT: bswapq %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: shrq $4, %r10 |
| ; SCALAR-NEXT: andq %rcx, %r10 |
| ; SCALAR-NEXT: andq %rcx, %rsi |
| ; SCALAR-NEXT: shlq $4, %rsi |
| ; SCALAR-NEXT: orq %r10, %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: andq %rax, %r10 |
| ; SCALAR-NEXT: shrq $2, %rsi |
| ; SCALAR-NEXT: andq %rax, %rsi |
| ; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: andq %rdx, %r10 |
| ; SCALAR-NEXT: shrq %rsi |
| ; SCALAR-NEXT: andq %rdx, %rsi |
| ; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $2, %r10d |
| ; SCALAR-NEXT: cmovneq %r9, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $1, %r11d |
| ; SCALAR-NEXT: cmovneq %r8, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: leaq (,%r8,4), %r9 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $4, %r10d |
| ; SCALAR-NEXT: cmovneq %r9, %r10 |
| ; SCALAR-NEXT: leaq (,%r8,8), %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $8, %r9d |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $4, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $16, %r11d |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $5, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $32, %ebx |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $6, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $64, %r10d |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $7, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $128, %r11d |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $8, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $256, %ebx # imm = 0x100 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $9, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $512, %r11d # imm = 0x200 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $10, %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400 |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $11, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $12, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $13, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $14, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $15, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000 |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $16, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $17, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $18, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $19, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $20, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $21, %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000 |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $22, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $23, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $24, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $25, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $26, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $27, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $28, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $29, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $30, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $31, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $32, %rbx |
| ; SCALAR-NEXT: xorl %r9d, %r9d |
| ; SCALAR-NEXT: btq $32, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $33, %r11 |
| ; SCALAR-NEXT: btq $33, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $34, %rbx |
| ; SCALAR-NEXT: btq $34, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r14 |
| ; SCALAR-NEXT: shlq $35, %r14 |
| ; SCALAR-NEXT: btq $35, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r14 |
| ; SCALAR-NEXT: xorq %rbx, %r14 |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $36, %r11 |
| ; SCALAR-NEXT: btq $36, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %r14, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $37, %r10 |
| ; SCALAR-NEXT: btq $37, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $38, %rbx |
| ; SCALAR-NEXT: btq $38, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $39, %r10 |
| ; SCALAR-NEXT: btq $39, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $40, %rbx |
| ; SCALAR-NEXT: btq $40, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $41, %r10 |
| ; SCALAR-NEXT: btq $41, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $42, %rbx |
| ; SCALAR-NEXT: btq $42, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $43, %r10 |
| ; SCALAR-NEXT: btq $43, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $44, %rbx |
| ; SCALAR-NEXT: btq $44, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r14 |
| ; SCALAR-NEXT: shlq $45, %r14 |
| ; SCALAR-NEXT: btq $45, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r14 |
| ; SCALAR-NEXT: xorq %rbx, %r14 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $46, %r10 |
| ; SCALAR-NEXT: btq $46, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %r14, %r10 |
| ; SCALAR-NEXT: xorq %r11, %r10 |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $47, %r11 |
| ; SCALAR-NEXT: btq $47, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $48, %rbx |
| ; SCALAR-NEXT: btq $48, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $49, %r11 |
| ; SCALAR-NEXT: btq $49, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $50, %rbx |
| ; SCALAR-NEXT: btq $50, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $51, %r11 |
| ; SCALAR-NEXT: btq $51, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $52, %rbx |
| ; SCALAR-NEXT: btq $52, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $53, %r11 |
| ; SCALAR-NEXT: btq $53, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $54, %rbx |
| ; SCALAR-NEXT: btq $54, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $55, %r11 |
| ; SCALAR-NEXT: btq $55, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $56, %rbx |
| ; SCALAR-NEXT: btq $56, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $57, %r11 |
| ; SCALAR-NEXT: btq $57, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $58, %r10 |
| ; SCALAR-NEXT: btq $58, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $59, %rbx |
| ; SCALAR-NEXT: btq $59, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $60, %r10 |
| ; SCALAR-NEXT: btq $60, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $61, %rbx |
| ; SCALAR-NEXT: btq $61, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: shlq $62, %r8 |
| ; SCALAR-NEXT: btq $62, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r8 |
| ; SCALAR-NEXT: xorq %rbx, %r8 |
| ; SCALAR-NEXT: shlq $63, %rdi |
| ; SCALAR-NEXT: btq $63, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: xorq %r11, %rdi |
| ; SCALAR-NEXT: bswapq %rdi |
| ; SCALAR-NEXT: movq %rdi, %rsi |
| ; SCALAR-NEXT: shrq $4, %rsi |
| ; SCALAR-NEXT: andq %rcx, %rsi |
| ; SCALAR-NEXT: andq %rcx, %rdi |
| ; SCALAR-NEXT: shlq $4, %rdi |
| ; SCALAR-NEXT: orq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rdi, %rcx |
| ; SCALAR-NEXT: andq %rax, %rcx |
| ; SCALAR-NEXT: shrq $2, %rdi |
| ; SCALAR-NEXT: andq %rax, %rdi |
| ; SCALAR-NEXT: leaq (%rdi,%rcx,4), %rax |
| ; SCALAR-NEXT: movq %rax, %rcx |
| ; SCALAR-NEXT: andq %rdx, %rcx |
| ; SCALAR-NEXT: shrq %rax |
| ; SCALAR-NEXT: andq %rdx, %rax |
| ; SCALAR-NEXT: leaq (%rax,%rcx,2), %rax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE2-PCLMUL-LABEL: clmulr_i64: |
| ; SSE2-PCLMUL: # %bb.0: |
| ; SSE2-PCLMUL-NEXT: movq %rsi, %xmm0 |
| ; SSE2-PCLMUL-NEXT: movq %rdi, %xmm1 |
| ; SSE2-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE2-PCLMUL-NEXT: movq %xmm1, %rcx |
| ; SSE2-PCLMUL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] |
| ; SSE2-PCLMUL-NEXT: movq %xmm0, %rax |
| ; SSE2-PCLMUL-NEXT: shldq $1, %rcx, %rax |
| ; SSE2-PCLMUL-NEXT: retq |
| ; |
| ; SSE42-PCLMUL-LABEL: clmulr_i64: |
| ; SSE42-PCLMUL: # %bb.0: |
| ; SSE42-PCLMUL-NEXT: movq %rsi, %xmm0 |
| ; SSE42-PCLMUL-NEXT: movq %rdi, %xmm1 |
| ; SSE42-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE42-PCLMUL-NEXT: movq %xmm1, %rcx |
| ; SSE42-PCLMUL-NEXT: pextrq $1, %xmm1, %rax |
| ; SSE42-PCLMUL-NEXT: shldq $1, %rcx, %rax |
| ; SSE42-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulr_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovq %rsi, %xmm0 |
| ; AVX-NEXT: vmovq %rdi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rcx |
| ; AVX-NEXT: vpextrq $1, %xmm0, %rax |
| ; AVX-NEXT: shldq $1, %rcx, %rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i64 %a to i128 |
| %b.ext = zext i64 %b to i128 |
| %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext) |
| %res.ext = lshr i128 %clmul, 63 |
| %res = trunc i128 %res.ext to i64 |
| ret i64 %res |
| } |
| |
| define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { |
| ; SCALAR-LABEL: clmulh_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %r12 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movzbl %dil, %ecx |
| ; SCALAR-NEXT: movl %ecx, %ebx |
| ; SCALAR-NEXT: shll $8, %ebx |
| ; SCALAR-NEXT: movl %ecx, %r11d |
| ; SCALAR-NEXT: shll $9, %r11d |
| ; SCALAR-NEXT: movl %ecx, %r10d |
| ; SCALAR-NEXT: shll $10, %r10d |
| ; SCALAR-NEXT: movl %ecx, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %ecx, %r9d |
| ; SCALAR-NEXT: shll $12, %r9d |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: shll $13, %r8d |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: shll $14, %edx |
| ; SCALAR-NEXT: xorl %ebp, %ebp |
| ; SCALAR-NEXT: testw %bp, %bp |
| ; SCALAR-NEXT: cmovel %ebp, %edx |
| ; SCALAR-NEXT: cmovel %ebp, %r8d |
| ; SCALAR-NEXT: cmovel %ebp, %r9d |
| ; SCALAR-NEXT: cmovel %ebp, %eax |
| ; SCALAR-NEXT: cmovel %ebp, %r10d |
| ; SCALAR-NEXT: cmovel %ebp, %r11d |
| ; SCALAR-NEXT: cmovel %ebp, %ebx |
| ; SCALAR-NEXT: shll $15, %edi |
| ; SCALAR-NEXT: testw %bp, %bp |
| ; SCALAR-NEXT: cmovnel %edi, %ebp |
| ; SCALAR-NEXT: movl %esi, %edi |
| ; SCALAR-NEXT: andl $1, %edi |
| ; SCALAR-NEXT: cmovnel %ecx, %edi |
| ; SCALAR-NEXT: leal (%rcx,%rcx), %r14d |
| ; SCALAR-NEXT: movl %esi, %r15d |
| ; SCALAR-NEXT: andl $2, %r15d |
| ; SCALAR-NEXT: cmovnel %r14d, %r15d |
| ; SCALAR-NEXT: xorl %edi, %r15d |
| ; SCALAR-NEXT: leal (,%rcx,4), %edi |
| ; SCALAR-NEXT: movl %esi, %r14d |
| ; SCALAR-NEXT: andl $4, %r14d |
| ; SCALAR-NEXT: cmovnel %edi, %r14d |
| ; SCALAR-NEXT: leal (,%rcx,8), %r12d |
| ; SCALAR-NEXT: movl %esi, %edi |
| ; SCALAR-NEXT: andl $8, %edi |
| ; SCALAR-NEXT: cmovnel %r12d, %edi |
| ; SCALAR-NEXT: xorl %r14d, %edi |
| ; SCALAR-NEXT: xorl %r15d, %edi |
| ; SCALAR-NEXT: movl %ecx, %r14d |
| ; SCALAR-NEXT: shll $4, %r14d |
| ; SCALAR-NEXT: movl %esi, %r15d |
| ; SCALAR-NEXT: andl $16, %r15d |
| ; SCALAR-NEXT: cmovnel %r14d, %r15d |
| ; SCALAR-NEXT: movl %ecx, %r14d |
| ; SCALAR-NEXT: shll $5, %r14d |
| ; SCALAR-NEXT: movl %esi, %r12d |
| ; SCALAR-NEXT: andl $32, %r12d |
| ; SCALAR-NEXT: cmovnel %r14d, %r12d |
| ; SCALAR-NEXT: xorl %r15d, %r12d |
| ; SCALAR-NEXT: movl %ecx, %r14d |
| ; SCALAR-NEXT: shll $6, %r14d |
| ; SCALAR-NEXT: movl %esi, %r15d |
| ; SCALAR-NEXT: andl $64, %r15d |
| ; SCALAR-NEXT: cmovnel %r14d, %r15d |
| ; SCALAR-NEXT: xorl %r12d, %r15d |
| ; SCALAR-NEXT: xorl %edi, %r15d |
| ; SCALAR-NEXT: shll $7, %ecx |
| ; SCALAR-NEXT: andl $128, %esi |
| ; SCALAR-NEXT: cmovel %esi, %ecx |
| ; SCALAR-NEXT: xorl %ebx, %ecx |
| ; SCALAR-NEXT: xorl %r11d, %ecx |
| ; SCALAR-NEXT: xorl %r10d, %ecx |
| ; SCALAR-NEXT: xorl %r15d, %ecx |
| ; SCALAR-NEXT: xorl %r9d, %eax |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %edx, %eax |
| ; SCALAR-NEXT: xorl %ebp, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: shrl $8, %eax |
| ; SCALAR-NEXT: # kill: def $al killed $al killed $eax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r12 |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulh_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzbl %sil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzbl %dil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $8, %eax |
| ; SSE-PCLMUL-NEXT: # kill: def $al killed $al killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulh_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzbl %sil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzbl %dil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $8, %eax |
| ; AVX-NEXT: # kill: def $al killed $al killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i8 %a to i16 |
| %b.ext = zext i8 %b to i16 |
| %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext) |
| %res.ext = lshr i16 %clmul, 8 |
| %res = trunc i16 %res.ext to i8 |
| ret i8 %res |
| } |
| |
| define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { |
| ; SCALAR-LABEL: clmulh_i16: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %r13 |
| ; SCALAR-NEXT: pushq %r12 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: movl %edi, %r14d |
| ; SCALAR-NEXT: movl %edi, %r13d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: movl %edi, %r15d |
| ; SCALAR-NEXT: movl %edi, %ebp |
| ; SCALAR-NEXT: movl %edi, %r10d |
| ; SCALAR-NEXT: movl %edi, %ebx |
| ; SCALAR-NEXT: movl %edi, %r11d |
| ; SCALAR-NEXT: movl %edi, %esi |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: movl %edi, %edx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shll $16, %r8d |
| ; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: shll $17, %r14d |
| ; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: shll $18, %r13d |
| ; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: shll $19, %r12d |
| ; SCALAR-NEXT: movl %r12d, %r13d |
| ; SCALAR-NEXT: shll $20, %r15d |
| ; SCALAR-NEXT: movl %r15d, %r12d |
| ; SCALAR-NEXT: shll $21, %ebp |
| ; SCALAR-NEXT: movl %ebp, %r15d |
| ; SCALAR-NEXT: shll $22, %r10d |
| ; SCALAR-NEXT: shll $23, %ebx |
| ; SCALAR-NEXT: movl %ebx, %r14d |
| ; SCALAR-NEXT: shll $24, %r11d |
| ; SCALAR-NEXT: movl %r11d, %ebp |
| ; SCALAR-NEXT: shll $25, %esi |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: shll $26, %ecx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: shll $27, %eax |
| ; SCALAR-NEXT: movl %eax, %esi |
| ; SCALAR-NEXT: shll $28, %edx |
| ; SCALAR-NEXT: movl %edi, %r11d |
| ; SCALAR-NEXT: shll $29, %r11d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $30, %ecx |
| ; SCALAR-NEXT: xorl %eax, %eax |
| ; SCALAR-NEXT: testl $0, %eax |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r11d |
| ; SCALAR-NEXT: cmovel %eax, %edx |
| ; SCALAR-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %esi |
| ; SCALAR-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %ebx |
| ; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %ebp |
| ; SCALAR-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r14d |
| ; SCALAR-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r10d |
| ; SCALAR-NEXT: cmovel %eax, %r15d |
| ; SCALAR-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; SCALAR-NEXT: cmovel %eax, %r12d |
| ; SCALAR-NEXT: cmovel %eax, %r13d |
| ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload |
| ; SCALAR-NEXT: cmovel %eax, %r15d |
| ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Reload |
| ; SCALAR-NEXT: cmovel %eax, %r14d |
| ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: movl %edi, %ebx |
| ; SCALAR-NEXT: shll $31, %ebx |
| ; SCALAR-NEXT: testl $0, %eax |
| ; SCALAR-NEXT: cmovel %eax, %ebx |
| ; SCALAR-NEXT: movzwl %di, %edi |
| ; SCALAR-NEXT: movl %r9d, %eax |
| ; SCALAR-NEXT: andl $1, %eax |
| ; SCALAR-NEXT: cmovnel %edi, %eax |
| ; SCALAR-NEXT: movl %r9d, %ecx |
| ; SCALAR-NEXT: andl $2, %ecx |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %esi |
| ; SCALAR-NEXT: cmovnel %esi, %ecx |
| ; SCALAR-NEXT: xorl %eax, %ecx |
| ; SCALAR-NEXT: movl %r9d, %eax |
| ; SCALAR-NEXT: andl $4, %eax |
| ; SCALAR-NEXT: leal (,%rdi,4), %esi |
| ; SCALAR-NEXT: cmovnel %esi, %eax |
| ; SCALAR-NEXT: movl %r9d, %esi |
| ; SCALAR-NEXT: andl $8, %esi |
| ; SCALAR-NEXT: leal (,%rdi,8), %ebp |
| ; SCALAR-NEXT: cmovnel %ebp, %esi |
| ; SCALAR-NEXT: xorl %eax, %esi |
| ; SCALAR-NEXT: xorl %ecx, %esi |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $4, %eax |
| ; SCALAR-NEXT: movl %r9d, %ecx |
| ; SCALAR-NEXT: andl $16, %ecx |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $5, %eax |
| ; SCALAR-NEXT: movl %r9d, %ebp |
| ; SCALAR-NEXT: andl $32, %ebp |
| ; SCALAR-NEXT: cmovnel %eax, %ebp |
| ; SCALAR-NEXT: xorl %ecx, %ebp |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $6, %ecx |
| ; SCALAR-NEXT: movl %r9d, %eax |
| ; SCALAR-NEXT: andl $64, %eax |
| ; SCALAR-NEXT: cmovnel %ecx, %eax |
| ; SCALAR-NEXT: xorl %ebp, %eax |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $7, %ecx |
| ; SCALAR-NEXT: movl %r9d, %esi |
| ; SCALAR-NEXT: andl $128, %esi |
| ; SCALAR-NEXT: cmovnel %ecx, %esi |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $8, %ecx |
| ; SCALAR-NEXT: movl %r9d, %ebp |
| ; SCALAR-NEXT: andl $256, %ebp # imm = 0x100 |
| ; SCALAR-NEXT: cmovnel %ecx, %ebp |
| ; SCALAR-NEXT: xorl %esi, %ebp |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $9, %ecx |
| ; SCALAR-NEXT: movl %r9d, %edx |
| ; SCALAR-NEXT: andl $512, %edx # imm = 0x200 |
| ; SCALAR-NEXT: cmovnel %ecx, %edx |
| ; SCALAR-NEXT: xorl %ebp, %edx |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shll $10, %ecx |
| ; SCALAR-NEXT: movl %r9d, %esi |
| ; SCALAR-NEXT: andl $1024, %esi # imm = 0x400 |
| ; SCALAR-NEXT: cmovnel %ecx, %esi |
| ; SCALAR-NEXT: xorl %edx, %esi |
| ; SCALAR-NEXT: xorl %eax, %esi |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %r9d, %ecx |
| ; SCALAR-NEXT: andl $2048, %ecx # imm = 0x800 |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $12, %eax |
| ; SCALAR-NEXT: movl %r9d, %edx |
| ; SCALAR-NEXT: andl $4096, %edx # imm = 0x1000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %ecx, %edx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $13, %eax |
| ; SCALAR-NEXT: movl %r9d, %ecx |
| ; SCALAR-NEXT: andl $8192, %ecx # imm = 0x2000 |
| ; SCALAR-NEXT: cmovnel %eax, %ecx |
| ; SCALAR-NEXT: xorl %edx, %ecx |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: shll $14, %eax |
| ; SCALAR-NEXT: movl %r9d, %edx |
| ; SCALAR-NEXT: andl $16384, %edx # imm = 0x4000 |
| ; SCALAR-NEXT: cmovnel %eax, %edx |
| ; SCALAR-NEXT: xorl %ecx, %edx |
| ; SCALAR-NEXT: shll $15, %edi |
| ; SCALAR-NEXT: andl $32768, %r9d # imm = 0x8000 |
| ; SCALAR-NEXT: cmovel %r9d, %edi |
| ; SCALAR-NEXT: xorl %edx, %edi |
| ; SCALAR-NEXT: xorl %esi, %edi |
| ; SCALAR-NEXT: xorl %r14d, %r8d |
| ; SCALAR-NEXT: xorl %r15d, %r8d |
| ; SCALAR-NEXT: xorl %r13d, %r8d |
| ; SCALAR-NEXT: xorl %r12d, %r8d |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %edi, %r8d |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %r8d, %r10d |
| ; SCALAR-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload |
| ; SCALAR-NEXT: xorl %ebx, %r11d |
| ; SCALAR-NEXT: xorl %r10d, %r11d |
| ; SCALAR-NEXT: shrl $16, %r11d |
| ; SCALAR-NEXT: movl %r11d, %eax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r12 |
| ; SCALAR-NEXT: popq %r13 |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulh_i16: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzwl %si, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzwl %di, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $16, %eax |
| ; SSE-PCLMUL-NEXT: # kill: def $ax killed $ax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulh_i16: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzwl %si, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzwl %di, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $16, %eax |
| ; AVX-NEXT: # kill: def $ax killed $ax killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i16 %a to i32 |
| %b.ext = zext i16 %b to i32 |
| %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext) |
| %res.ext = lshr i32 %clmul, 16 |
| %res = trunc i32 %res.ext to i16 |
| ret i16 %res |
| } |
| |
| define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { |
| ; SCALAR-LABEL: clmulh_i32: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: movl %edi, %eax |
| ; SCALAR-NEXT: leaq (%rax,%rax), %rdx |
| ; SCALAR-NEXT: movl %esi, %ecx |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $2, %esi |
| ; SCALAR-NEXT: cmovneq %rdx, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $1, %edi |
| ; SCALAR-NEXT: cmovneq %rax, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: leaq (,%rax,4), %rdx |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $4, %esi |
| ; SCALAR-NEXT: cmovneq %rdx, %rsi |
| ; SCALAR-NEXT: leaq (,%rax,8), %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $8, %edx |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $4, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $16, %edi |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $5, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $32, %r8d |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $6, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $64, %esi |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $7, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $128, %edi |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $8, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $256, %r8d # imm = 0x100 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $9, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $512, %edi # imm = 0x200 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $10, %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $1024, %edx # imm = 0x400 |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $11, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $2048, %edi # imm = 0x800 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $12, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $4096, %r8d # imm = 0x1000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $13, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $8192, %edi # imm = 0x2000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $14, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $16384, %r8d # imm = 0x4000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $15, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $32768, %esi # imm = 0x8000 |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $16, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $65536, %edi # imm = 0x10000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $17, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $131072, %r8d # imm = 0x20000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $18, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $262144, %edi # imm = 0x40000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $19, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $524288, %r8d # imm = 0x80000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $20, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $1048576, %edi # imm = 0x100000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $21, %r8 |
| ; SCALAR-NEXT: movl %ecx, %edx |
| ; SCALAR-NEXT: andl $2097152, %edx # imm = 0x200000 |
| ; SCALAR-NEXT: cmovneq %r8, %rdx |
| ; SCALAR-NEXT: xorq %rdi, %rdx |
| ; SCALAR-NEXT: xorq %rsi, %rdx |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $22, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $4194304, %edi # imm = 0x400000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $23, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $8388608, %r8d # imm = 0x800000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $24, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $16777216, %edi # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $25, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $33554432, %r8d # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $26, %rsi |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $67108864, %edi # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $27, %rsi |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $134217728, %r8d # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovneq %rsi, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $28, %rdi |
| ; SCALAR-NEXT: movl %ecx, %esi |
| ; SCALAR-NEXT: andl $268435456, %esi # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovneq %rdi, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: xorq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $29, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $536870912, %edi # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $30, %rdx |
| ; SCALAR-NEXT: movl %ecx, %r8d |
| ; SCALAR-NEXT: andl $1073741824, %r8d # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdx |
| ; SCALAR-NEXT: shlq $31, %rdx |
| ; SCALAR-NEXT: movl %ecx, %edi |
| ; SCALAR-NEXT: andl $-2147483648, %edi # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovneq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $32, %r8 |
| ; SCALAR-NEXT: xorl %edx, %edx |
| ; SCALAR-NEXT: btq $32, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $33, %rdi |
| ; SCALAR-NEXT: btq $33, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $34, %r8 |
| ; SCALAR-NEXT: btq $34, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %r9 |
| ; SCALAR-NEXT: shlq $35, %r9 |
| ; SCALAR-NEXT: btq $35, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $36, %rdi |
| ; SCALAR-NEXT: btq $36, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r9, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $37, %rsi |
| ; SCALAR-NEXT: btq $37, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $38, %r8 |
| ; SCALAR-NEXT: btq $38, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $39, %rsi |
| ; SCALAR-NEXT: btq $39, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $40, %r8 |
| ; SCALAR-NEXT: btq $40, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $41, %rsi |
| ; SCALAR-NEXT: btq $41, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $42, %r8 |
| ; SCALAR-NEXT: btq $42, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $43, %rsi |
| ; SCALAR-NEXT: btq $43, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $44, %r8 |
| ; SCALAR-NEXT: btq $44, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %r9 |
| ; SCALAR-NEXT: shlq $45, %r9 |
| ; SCALAR-NEXT: btq $45, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r9 |
| ; SCALAR-NEXT: xorq %r8, %r9 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $46, %rsi |
| ; SCALAR-NEXT: btq $46, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r9, %rsi |
| ; SCALAR-NEXT: xorq %rdi, %rsi |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $47, %rdi |
| ; SCALAR-NEXT: btq $47, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $48, %r8 |
| ; SCALAR-NEXT: btq $48, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $49, %rdi |
| ; SCALAR-NEXT: btq $49, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $50, %r8 |
| ; SCALAR-NEXT: btq $50, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $51, %rdi |
| ; SCALAR-NEXT: btq $51, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $52, %r8 |
| ; SCALAR-NEXT: btq $52, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $53, %rdi |
| ; SCALAR-NEXT: btq $53, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $54, %r8 |
| ; SCALAR-NEXT: btq $54, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $55, %rdi |
| ; SCALAR-NEXT: btq $55, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $56, %r8 |
| ; SCALAR-NEXT: btq $56, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rdi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rdi |
| ; SCALAR-NEXT: shlq $57, %rdi |
| ; SCALAR-NEXT: btq $57, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: xorq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $58, %rsi |
| ; SCALAR-NEXT: btq $58, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $59, %r8 |
| ; SCALAR-NEXT: btq $59, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $60, %rsi |
| ; SCALAR-NEXT: btq $60, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: movq %rax, %r8 |
| ; SCALAR-NEXT: shlq $61, %r8 |
| ; SCALAR-NEXT: btq $61, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %r8 |
| ; SCALAR-NEXT: xorq %rsi, %r8 |
| ; SCALAR-NEXT: movq %rax, %rsi |
| ; SCALAR-NEXT: shlq $62, %rsi |
| ; SCALAR-NEXT: btq $62, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rsi |
| ; SCALAR-NEXT: xorq %r8, %rsi |
| ; SCALAR-NEXT: shlq $63, %rax |
| ; SCALAR-NEXT: btq $63, %rcx |
| ; SCALAR-NEXT: cmovaeq %rdx, %rax |
| ; SCALAR-NEXT: xorq %rsi, %rax |
| ; SCALAR-NEXT: xorq %rdi, %rax |
| ; SCALAR-NEXT: shrq $32, %rax |
| ; SCALAR-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: clmulh_i32: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrq $32, %rax |
| ; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulh_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrq $32, %rax |
| ; AVX-NEXT: # kill: def $eax killed $eax killed $rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i32 %a to i64 |
| %b.ext = zext i32 %b to i64 |
| %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) |
| %res.ext = lshr i64 %clmul, 32 |
| %res = trunc i64 %res.ext to i32 |
| ret i32 %res |
| } |
| |
| define i64 @clmulh_i64(i64 %a, i64 %b) nounwind { |
| ; SCALAR-LABEL: clmulh_i64: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: bswapq %rdi |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: shrq $4, %rax |
| ; SCALAR-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F |
| ; SCALAR-NEXT: andq %rdx, %rax |
| ; SCALAR-NEXT: andq %rdx, %rdi |
| ; SCALAR-NEXT: shlq $4, %rdi |
| ; SCALAR-NEXT: orq %rax, %rdi |
| ; SCALAR-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 |
| ; SCALAR-NEXT: movq %rdi, %rax |
| ; SCALAR-NEXT: andq %rcx, %rax |
| ; SCALAR-NEXT: shrq $2, %rdi |
| ; SCALAR-NEXT: andq %rcx, %rdi |
| ; SCALAR-NEXT: leaq (%rdi,%rax,4), %rdi |
| ; SCALAR-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 |
| ; SCALAR-NEXT: movq %rdi, %r8 |
| ; SCALAR-NEXT: andq %rax, %r8 |
| ; SCALAR-NEXT: shrq %rdi |
| ; SCALAR-NEXT: movq %rdi, %r9 |
| ; SCALAR-NEXT: andq %rax, %r9 |
| ; SCALAR-NEXT: leaq (%r9,%r8,2), %r8 |
| ; SCALAR-NEXT: leaq (%r8,%r8), %r9 |
| ; SCALAR-NEXT: bswapq %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: shrq $4, %r10 |
| ; SCALAR-NEXT: andq %rdx, %r10 |
| ; SCALAR-NEXT: andq %rdx, %rsi |
| ; SCALAR-NEXT: shlq $4, %rsi |
| ; SCALAR-NEXT: orq %r10, %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: andq %rcx, %r10 |
| ; SCALAR-NEXT: shrq $2, %rsi |
| ; SCALAR-NEXT: andq %rcx, %rsi |
| ; SCALAR-NEXT: leaq (%rsi,%r10,4), %rsi |
| ; SCALAR-NEXT: movq %rsi, %r10 |
| ; SCALAR-NEXT: andq %rax, %r10 |
| ; SCALAR-NEXT: shrq %rsi |
| ; SCALAR-NEXT: andq %rax, %rsi |
| ; SCALAR-NEXT: leaq (%rsi,%r10,2), %rsi |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $2, %r10d |
| ; SCALAR-NEXT: cmovneq %r9, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $1, %r11d |
| ; SCALAR-NEXT: cmovneq %r8, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: leaq (,%r8,4), %r9 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $4, %r10d |
| ; SCALAR-NEXT: cmovneq %r9, %r10 |
| ; SCALAR-NEXT: leaq (,%r8,8), %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $8, %r9d |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $4, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $16, %r11d |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $5, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $32, %ebx |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $6, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $64, %r10d |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $7, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $128, %r11d |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $8, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $256, %ebx # imm = 0x100 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $9, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $512, %r11d # imm = 0x200 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $10, %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $1024, %r9d # imm = 0x400 |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $11, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $2048, %r11d # imm = 0x800 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $12, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $4096, %ebx # imm = 0x1000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $13, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $8192, %r11d # imm = 0x2000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $14, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $16384, %ebx # imm = 0x4000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $15, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $32768, %r10d # imm = 0x8000 |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $16, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $65536, %r11d # imm = 0x10000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $17, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $131072, %ebx # imm = 0x20000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $18, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $262144, %r11d # imm = 0x40000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $19, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $524288, %ebx # imm = 0x80000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $20, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $1048576, %r11d # imm = 0x100000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $21, %rbx |
| ; SCALAR-NEXT: movl %esi, %r9d |
| ; SCALAR-NEXT: andl $2097152, %r9d # imm = 0x200000 |
| ; SCALAR-NEXT: cmovneq %rbx, %r9 |
| ; SCALAR-NEXT: xorq %r11, %r9 |
| ; SCALAR-NEXT: xorq %r10, %r9 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $22, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $4194304, %r11d # imm = 0x400000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $23, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $8388608, %ebx # imm = 0x800000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $24, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $16777216, %r11d # imm = 0x1000000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $25, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $33554432, %ebx # imm = 0x2000000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $26, %r10 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $67108864, %r11d # imm = 0x4000000 |
| ; SCALAR-NEXT: cmovneq %r10, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $27, %r10 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $134217728, %ebx # imm = 0x8000000 |
| ; SCALAR-NEXT: cmovneq %r10, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $28, %r11 |
| ; SCALAR-NEXT: movl %esi, %r10d |
| ; SCALAR-NEXT: andl $268435456, %r10d # imm = 0x10000000 |
| ; SCALAR-NEXT: cmovneq %r11, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: xorq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $29, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $536870912, %r11d # imm = 0x20000000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $30, %r9 |
| ; SCALAR-NEXT: movl %esi, %ebx |
| ; SCALAR-NEXT: andl $1073741824, %ebx # imm = 0x40000000 |
| ; SCALAR-NEXT: cmovneq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r9 |
| ; SCALAR-NEXT: shlq $31, %r9 |
| ; SCALAR-NEXT: movl %esi, %r11d |
| ; SCALAR-NEXT: andl $-2147483648, %r11d # imm = 0x80000000 |
| ; SCALAR-NEXT: cmovneq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $32, %rbx |
| ; SCALAR-NEXT: xorl %r9d, %r9d |
| ; SCALAR-NEXT: btq $32, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $33, %r11 |
| ; SCALAR-NEXT: btq $33, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $34, %rbx |
| ; SCALAR-NEXT: btq $34, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r14 |
| ; SCALAR-NEXT: shlq $35, %r14 |
| ; SCALAR-NEXT: btq $35, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r14 |
| ; SCALAR-NEXT: xorq %rbx, %r14 |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $36, %r11 |
| ; SCALAR-NEXT: btq $36, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %r14, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $37, %r10 |
| ; SCALAR-NEXT: btq $37, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $38, %rbx |
| ; SCALAR-NEXT: btq $38, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $39, %r10 |
| ; SCALAR-NEXT: btq $39, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $40, %rbx |
| ; SCALAR-NEXT: btq $40, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $41, %r10 |
| ; SCALAR-NEXT: btq $41, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $42, %rbx |
| ; SCALAR-NEXT: btq $42, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $43, %r10 |
| ; SCALAR-NEXT: btq $43, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $44, %rbx |
| ; SCALAR-NEXT: btq $44, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r14 |
| ; SCALAR-NEXT: shlq $45, %r14 |
| ; SCALAR-NEXT: btq $45, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r14 |
| ; SCALAR-NEXT: xorq %rbx, %r14 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $46, %r10 |
| ; SCALAR-NEXT: btq $46, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %r14, %r10 |
| ; SCALAR-NEXT: xorq %r11, %r10 |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $47, %r11 |
| ; SCALAR-NEXT: btq $47, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $48, %rbx |
| ; SCALAR-NEXT: btq $48, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $49, %r11 |
| ; SCALAR-NEXT: btq $49, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $50, %rbx |
| ; SCALAR-NEXT: btq $50, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $51, %r11 |
| ; SCALAR-NEXT: btq $51, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $52, %rbx |
| ; SCALAR-NEXT: btq $52, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $53, %r11 |
| ; SCALAR-NEXT: btq $53, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $54, %rbx |
| ; SCALAR-NEXT: btq $54, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $55, %r11 |
| ; SCALAR-NEXT: btq $55, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $56, %rbx |
| ; SCALAR-NEXT: btq $56, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r11, %rbx |
| ; SCALAR-NEXT: movq %r8, %r11 |
| ; SCALAR-NEXT: shlq $57, %r11 |
| ; SCALAR-NEXT: btq $57, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r11 |
| ; SCALAR-NEXT: xorq %rbx, %r11 |
| ; SCALAR-NEXT: xorq %r10, %r11 |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $58, %r10 |
| ; SCALAR-NEXT: btq $58, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $59, %rbx |
| ; SCALAR-NEXT: btq $59, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: movq %r8, %r10 |
| ; SCALAR-NEXT: shlq $60, %r10 |
| ; SCALAR-NEXT: btq $60, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r10 |
| ; SCALAR-NEXT: xorq %rbx, %r10 |
| ; SCALAR-NEXT: movq %r8, %rbx |
| ; SCALAR-NEXT: shlq $61, %rbx |
| ; SCALAR-NEXT: btq $61, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rbx |
| ; SCALAR-NEXT: xorq %r10, %rbx |
| ; SCALAR-NEXT: shlq $62, %r8 |
| ; SCALAR-NEXT: btq $62, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %r8 |
| ; SCALAR-NEXT: xorq %rbx, %r8 |
| ; SCALAR-NEXT: shlq $63, %rdi |
| ; SCALAR-NEXT: btq $63, %rsi |
| ; SCALAR-NEXT: cmovaeq %r9, %rdi |
| ; SCALAR-NEXT: xorq %r8, %rdi |
| ; SCALAR-NEXT: xorq %r11, %rdi |
| ; SCALAR-NEXT: bswapq %rdi |
| ; SCALAR-NEXT: movq %rdi, %rsi |
| ; SCALAR-NEXT: shrq $4, %rsi |
| ; SCALAR-NEXT: andq %rdx, %rsi |
| ; SCALAR-NEXT: andq %rdx, %rdi |
| ; SCALAR-NEXT: shlq $4, %rdi |
| ; SCALAR-NEXT: orq %rsi, %rdi |
| ; SCALAR-NEXT: movq %rdi, %rdx |
| ; SCALAR-NEXT: andq %rcx, %rdx |
| ; SCALAR-NEXT: shrq $2, %rdi |
| ; SCALAR-NEXT: andq %rcx, %rdi |
| ; SCALAR-NEXT: leaq (%rdi,%rdx,4), %rcx |
| ; SCALAR-NEXT: andq %rcx, %rax |
| ; SCALAR-NEXT: shrq %rcx |
| ; SCALAR-NEXT: movabsq $6148914691236517204, %rdx # imm = 0x5555555555555554 |
| ; SCALAR-NEXT: andq %rcx, %rdx |
| ; SCALAR-NEXT: leaq (%rdx,%rax,2), %rax |
| ; SCALAR-NEXT: shrq %rax |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE2-PCLMUL-LABEL: clmulh_i64: |
| ; SSE2-PCLMUL: # %bb.0: |
| ; SSE2-PCLMUL-NEXT: movq %rsi, %xmm0 |
| ; SSE2-PCLMUL-NEXT: movq %rdi, %xmm1 |
| ; SSE2-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE2-PCLMUL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] |
| ; SSE2-PCLMUL-NEXT: movq %xmm0, %rax |
| ; SSE2-PCLMUL-NEXT: retq |
| ; |
| ; SSE42-PCLMUL-LABEL: clmulh_i64: |
| ; SSE42-PCLMUL: # %bb.0: |
| ; SSE42-PCLMUL-NEXT: movq %rsi, %xmm0 |
| ; SSE42-PCLMUL-NEXT: movq %rdi, %xmm1 |
| ; SSE42-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE42-PCLMUL-NEXT: pextrq $1, %xmm1, %rax |
| ; SSE42-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: clmulh_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovq %rsi, %xmm0 |
| ; AVX-NEXT: vmovq %rdi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vpextrq $1, %xmm0, %rax |
| ; AVX-NEXT: retq |
| %a.ext = zext i64 %a to i128 |
| %b.ext = zext i64 %b to i128 |
| %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext) |
| %res.ext = lshr i128 %clmul, 64 |
| %res = trunc i128 %res.ext to i64 |
| ret i64 %res |
| } |
| |
| define i8 @clmul_i8_noimplicitfloat(i8 %a, i8 %b) nounwind noimplicitfloat { |
| ; CHECK-LABEL: clmul_i8_noimplicitfloat: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi |
| ; CHECK-NEXT: xorl %ecx, %ecx |
| ; CHECK-NEXT: testb $1, %sil |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: cmovel %ecx, %eax |
| ; CHECK-NEXT: leal (%rdi,%rdi), %edx |
| ; CHECK-NEXT: movzbl %dl, %edx |
| ; CHECK-NEXT: testb $2, %sil |
| ; CHECK-NEXT: cmovel %ecx, %edx |
| ; CHECK-NEXT: xorl %eax, %edx |
| ; CHECK-NEXT: leal (,%rdi,4), %eax |
| ; CHECK-NEXT: movzbl %al, %r8d |
| ; CHECK-NEXT: testb $4, %sil |
| ; CHECK-NEXT: cmovel %ecx, %r8d |
| ; CHECK-NEXT: leal (,%rdi,8), %eax |
| ; CHECK-NEXT: movzbl %al, %eax |
| ; CHECK-NEXT: testb $8, %sil |
| ; CHECK-NEXT: cmovel %ecx, %eax |
| ; CHECK-NEXT: xorl %r8d, %eax |
| ; CHECK-NEXT: xorl %edx, %eax |
| ; CHECK-NEXT: movl %edi, %edx |
| ; CHECK-NEXT: shlb $4, %dl |
| ; CHECK-NEXT: movzbl %dl, %edx |
| ; CHECK-NEXT: testb $16, %sil |
| ; CHECK-NEXT: cmovel %ecx, %edx |
| ; CHECK-NEXT: movl %edi, %r8d |
| ; CHECK-NEXT: shlb $5, %r8b |
| ; CHECK-NEXT: movzbl %r8b, %r8d |
| ; CHECK-NEXT: testb $32, %sil |
| ; CHECK-NEXT: cmovel %ecx, %r8d |
| ; CHECK-NEXT: xorl %edx, %r8d |
| ; CHECK-NEXT: movl %edi, %edx |
| ; CHECK-NEXT: shlb $6, %dl |
| ; CHECK-NEXT: movzbl %dl, %edx |
| ; CHECK-NEXT: testb $64, %sil |
| ; CHECK-NEXT: cmovel %ecx, %edx |
| ; CHECK-NEXT: xorl %r8d, %edx |
| ; CHECK-NEXT: xorl %eax, %edx |
| ; CHECK-NEXT: shlb $7, %dil |
| ; CHECK-NEXT: movzbl %dil, %eax |
| ; CHECK-NEXT: testb $-128, %sil |
| ; CHECK-NEXT: cmovel %ecx, %eax |
| ; CHECK-NEXT: xorl %edx, %eax |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %res = call i8 @llvm.clmul.i8(i8 %a, i8 %b) |
| ret i8 %res |
| } |
| |
| declare void @use(i8) |
| |
| define void @commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind { |
| ; SCALAR-LABEL: commutative_clmul_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi |
| ; SCALAR-NEXT: xorl %eax, %eax |
| ; SCALAR-NEXT: testb $1, %sil |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %r9d |
| ; SCALAR-NEXT: movzbl %r9b, %r9d |
| ; SCALAR-NEXT: testb $2, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r9d |
| ; SCALAR-NEXT: xorl %r8d, %r9d |
| ; SCALAR-NEXT: leal (,%rdi,4), %r8d |
| ; SCALAR-NEXT: movzbl %r8b, %r10d |
| ; SCALAR-NEXT: testb $4, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r10d |
| ; SCALAR-NEXT: leal (,%rdi,8), %r8d |
| ; SCALAR-NEXT: movzbl %r8b, %r8d |
| ; SCALAR-NEXT: testb $8, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: xorl %r10d, %r8d |
| ; SCALAR-NEXT: xorl %r9d, %r8d |
| ; SCALAR-NEXT: movl %edi, %r9d |
| ; SCALAR-NEXT: shlb $4, %r9b |
| ; SCALAR-NEXT: movzbl %r9b, %r9d |
| ; SCALAR-NEXT: testb $16, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r9d |
| ; SCALAR-NEXT: movl %edi, %r10d |
| ; SCALAR-NEXT: shlb $5, %r10b |
| ; SCALAR-NEXT: movzbl %r10b, %r10d |
| ; SCALAR-NEXT: testb $32, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r10d |
| ; SCALAR-NEXT: xorl %r9d, %r10d |
| ; SCALAR-NEXT: movl %edi, %r9d |
| ; SCALAR-NEXT: shlb $6, %r9b |
| ; SCALAR-NEXT: movzbl %r9b, %r9d |
| ; SCALAR-NEXT: testb $64, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r9d |
| ; SCALAR-NEXT: xorl %r10d, %r9d |
| ; SCALAR-NEXT: xorl %r8d, %r9d |
| ; SCALAR-NEXT: shlb $7, %dil |
| ; SCALAR-NEXT: movzbl %dil, %edi |
| ; SCALAR-NEXT: testb $-128, %sil |
| ; SCALAR-NEXT: cmovel %eax, %edi |
| ; SCALAR-NEXT: xorl %r9d, %edi |
| ; SCALAR-NEXT: movb %dil, (%rdx) |
| ; SCALAR-NEXT: movb %dil, (%rcx) |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: commutative_clmul_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: movb %al, (%rdx) |
| ; SSE-PCLMUL-NEXT: movb %al, (%rcx) |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: commutative_clmul_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: movb %al, (%rdx) |
| ; AVX-NEXT: movb %al, (%rcx) |
| ; AVX-NEXT: retq |
| %xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y) |
| %yx = call i8 @llvm.clmul.i8(i8 %y, i8 %x) |
| store i8 %xy, ptr %p0 |
| store i8 %yx, ptr %p1 |
| ret void |
| } |
| |
| define void @commutative_clmulh_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind { |
| ; SCALAR-LABEL: commutative_clmulh_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %r13 |
| ; SCALAR-NEXT: pushq %r12 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill |
| ; SCALAR-NEXT: movzbl %sil, %r14d |
| ; SCALAR-NEXT: movl %r14d, %ebp |
| ; SCALAR-NEXT: shll $8, %ebp |
| ; SCALAR-NEXT: movl %r14d, %ebx |
| ; SCALAR-NEXT: shll $9, %ebx |
| ; SCALAR-NEXT: movl %r14d, %r11d |
| ; SCALAR-NEXT: shll $10, %r11d |
| ; SCALAR-NEXT: movl %r14d, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %r14d, %r10d |
| ; SCALAR-NEXT: shll $12, %r10d |
| ; SCALAR-NEXT: movl %r14d, %ecx |
| ; SCALAR-NEXT: shll $13, %ecx |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: shll $14, %r8d |
| ; SCALAR-NEXT: xorl %r15d, %r15d |
| ; SCALAR-NEXT: testw %r15w, %r15w |
| ; SCALAR-NEXT: cmovel %r15d, %r8d |
| ; SCALAR-NEXT: cmovel %r15d, %ecx |
| ; SCALAR-NEXT: cmovel %r15d, %r10d |
| ; SCALAR-NEXT: cmovel %r15d, %eax |
| ; SCALAR-NEXT: cmovel %r15d, %r11d |
| ; SCALAR-NEXT: cmovel %r15d, %ebx |
| ; SCALAR-NEXT: cmovel %r15d, %ebp |
| ; SCALAR-NEXT: shll $15, %esi |
| ; SCALAR-NEXT: testw %r15w, %r15w |
| ; SCALAR-NEXT: cmovel %r15d, %esi |
| ; SCALAR-NEXT: movl %edi, %r15d |
| ; SCALAR-NEXT: andl $1, %r15d |
| ; SCALAR-NEXT: cmovnel %r14d, %r15d |
| ; SCALAR-NEXT: leal (%r14,%r14), %r12d |
| ; SCALAR-NEXT: movl %edi, %r13d |
| ; SCALAR-NEXT: andl $2, %r13d |
| ; SCALAR-NEXT: cmovnel %r12d, %r13d |
| ; SCALAR-NEXT: xorl %r15d, %r13d |
| ; SCALAR-NEXT: leal (,%r14,4), %r15d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $4, %r12d |
| ; SCALAR-NEXT: cmovnel %r15d, %r12d |
| ; SCALAR-NEXT: movl %edi, %r15d |
| ; SCALAR-NEXT: andl $8, %r15d |
| ; SCALAR-NEXT: leal (,%r14,8), %r9d |
| ; SCALAR-NEXT: cmovnel %r9d, %r15d |
| ; SCALAR-NEXT: xorl %r12d, %r15d |
| ; SCALAR-NEXT: xorl %r13d, %r15d |
| ; SCALAR-NEXT: movl %r14d, %r9d |
| ; SCALAR-NEXT: shll $4, %r9d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $16, %r12d |
| ; SCALAR-NEXT: cmovnel %r9d, %r12d |
| ; SCALAR-NEXT: movl %r14d, %r9d |
| ; SCALAR-NEXT: shll $5, %r9d |
| ; SCALAR-NEXT: movl %edi, %r13d |
| ; SCALAR-NEXT: andl $32, %r13d |
| ; SCALAR-NEXT: cmovnel %r9d, %r13d |
| ; SCALAR-NEXT: xorl %r12d, %r13d |
| ; SCALAR-NEXT: movl %r14d, %r9d |
| ; SCALAR-NEXT: shll $6, %r9d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $64, %r12d |
| ; SCALAR-NEXT: cmovnel %r9d, %r12d |
| ; SCALAR-NEXT: xorl %r13d, %r12d |
| ; SCALAR-NEXT: xorl %r15d, %r12d |
| ; SCALAR-NEXT: shll $7, %r14d |
| ; SCALAR-NEXT: andl $128, %edi |
| ; SCALAR-NEXT: cmovnel %r14d, %edi |
| ; SCALAR-NEXT: xorl %ebp, %edi |
| ; SCALAR-NEXT: xorl %ebx, %edi |
| ; SCALAR-NEXT: xorl %r11d, %edi |
| ; SCALAR-NEXT: xorl %r12d, %edi |
| ; SCALAR-NEXT: xorl %r10d, %eax |
| ; SCALAR-NEXT: xorl %ecx, %eax |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: xorl %edi, %eax |
| ; SCALAR-NEXT: shrl $8, %eax |
| ; SCALAR-NEXT: movb %al, (%rdx) |
| ; SCALAR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload |
| ; SCALAR-NEXT: movb %al, (%rcx) |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r12 |
| ; SCALAR-NEXT: popq %r13 |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: commutative_clmulh_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzbl %dil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzbl %sil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $8, %eax |
| ; SSE-PCLMUL-NEXT: movb %al, (%rdx) |
| ; SSE-PCLMUL-NEXT: movb %al, (%rcx) |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: commutative_clmulh_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzbl %dil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzbl %sil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $8, %eax |
| ; AVX-NEXT: movb %al, (%rdx) |
| ; AVX-NEXT: movb %al, (%rcx) |
| ; AVX-NEXT: retq |
| %x.ext = zext i8 %x to i16 |
| %y.ext = zext i8 %y to i16 |
| %clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext) |
| %clmul_yx = call i16 @llvm.clmul.i16(i16 %y.ext, i16 %x.ext) |
| %clmul_xy_lshr = lshr i16 %clmul_xy, 8 |
| %clmul_yx_lshr = lshr i16 %clmul_yx, 8 |
| %clmulh_xy = trunc i16 %clmul_xy_lshr to i8 |
| %clmulh_yx = trunc i16 %clmul_yx_lshr to i8 |
| store i8 %clmulh_xy, ptr %p0 |
| store i8 %clmulh_yx, ptr %p1 |
| ret void |
| } |
| |
| define void @commutative_clmulr_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind { |
| ; SCALAR-LABEL: commutative_clmulr_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %r15 |
| ; SCALAR-NEXT: pushq %r14 |
| ; SCALAR-NEXT: pushq %r13 |
| ; SCALAR-NEXT: pushq %r12 |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: movzbl %sil, %r14d |
| ; SCALAR-NEXT: movl %r14d, %ebx |
| ; SCALAR-NEXT: shll $8, %ebx |
| ; SCALAR-NEXT: movl %r14d, %r11d |
| ; SCALAR-NEXT: shll $9, %r11d |
| ; SCALAR-NEXT: movl %r14d, %r10d |
| ; SCALAR-NEXT: shll $10, %r10d |
| ; SCALAR-NEXT: movl %r14d, %eax |
| ; SCALAR-NEXT: shll $11, %eax |
| ; SCALAR-NEXT: movl %r14d, %r9d |
| ; SCALAR-NEXT: shll $12, %r9d |
| ; SCALAR-NEXT: movl %esi, %r8d |
| ; SCALAR-NEXT: shll $13, %r8d |
| ; SCALAR-NEXT: xorl %ebp, %ebp |
| ; SCALAR-NEXT: testw %bp, %bp |
| ; SCALAR-NEXT: cmovel %ebp, %r8d |
| ; SCALAR-NEXT: cmovel %ebp, %r9d |
| ; SCALAR-NEXT: cmovel %ebp, %eax |
| ; SCALAR-NEXT: cmovel %ebp, %r10d |
| ; SCALAR-NEXT: cmovel %ebp, %r11d |
| ; SCALAR-NEXT: cmovel %ebp, %ebx |
| ; SCALAR-NEXT: shll $14, %esi |
| ; SCALAR-NEXT: testw %bp, %bp |
| ; SCALAR-NEXT: cmovel %ebp, %esi |
| ; SCALAR-NEXT: movl %edi, %ebp |
| ; SCALAR-NEXT: andl $1, %ebp |
| ; SCALAR-NEXT: cmovnel %r14d, %ebp |
| ; SCALAR-NEXT: leal (%r14,%r14), %r15d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $2, %r12d |
| ; SCALAR-NEXT: cmovnel %r15d, %r12d |
| ; SCALAR-NEXT: xorl %ebp, %r12d |
| ; SCALAR-NEXT: leal (,%r14,4), %ebp |
| ; SCALAR-NEXT: movl %edi, %r15d |
| ; SCALAR-NEXT: andl $4, %r15d |
| ; SCALAR-NEXT: cmovnel %ebp, %r15d |
| ; SCALAR-NEXT: leal (,%r14,8), %r13d |
| ; SCALAR-NEXT: movl %edi, %ebp |
| ; SCALAR-NEXT: andl $8, %ebp |
| ; SCALAR-NEXT: cmovnel %r13d, %ebp |
| ; SCALAR-NEXT: xorl %r15d, %ebp |
| ; SCALAR-NEXT: xorl %r12d, %ebp |
| ; SCALAR-NEXT: movl %r14d, %r15d |
| ; SCALAR-NEXT: shll $4, %r15d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $16, %r12d |
| ; SCALAR-NEXT: cmovnel %r15d, %r12d |
| ; SCALAR-NEXT: movl %r14d, %r15d |
| ; SCALAR-NEXT: shll $5, %r15d |
| ; SCALAR-NEXT: movl %edi, %r13d |
| ; SCALAR-NEXT: andl $32, %r13d |
| ; SCALAR-NEXT: cmovnel %r15d, %r13d |
| ; SCALAR-NEXT: xorl %r12d, %r13d |
| ; SCALAR-NEXT: movl %r14d, %r15d |
| ; SCALAR-NEXT: shll $6, %r15d |
| ; SCALAR-NEXT: movl %edi, %r12d |
| ; SCALAR-NEXT: andl $64, %r12d |
| ; SCALAR-NEXT: cmovnel %r15d, %r12d |
| ; SCALAR-NEXT: xorl %r13d, %r12d |
| ; SCALAR-NEXT: xorl %ebp, %r12d |
| ; SCALAR-NEXT: shll $7, %r14d |
| ; SCALAR-NEXT: andl $128, %edi |
| ; SCALAR-NEXT: cmovnel %r14d, %edi |
| ; SCALAR-NEXT: xorl %ebx, %edi |
| ; SCALAR-NEXT: xorl %r11d, %edi |
| ; SCALAR-NEXT: xorl %r10d, %edi |
| ; SCALAR-NEXT: xorl %r12d, %edi |
| ; SCALAR-NEXT: xorl %r9d, %eax |
| ; SCALAR-NEXT: xorl %r8d, %eax |
| ; SCALAR-NEXT: xorl %esi, %eax |
| ; SCALAR-NEXT: xorl %edi, %eax |
| ; SCALAR-NEXT: shrl $7, %eax |
| ; SCALAR-NEXT: movb %al, (%rdx) |
| ; SCALAR-NEXT: movb %al, (%rcx) |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %r12 |
| ; SCALAR-NEXT: popq %r13 |
| ; SCALAR-NEXT: popq %r14 |
| ; SCALAR-NEXT: popq %r15 |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: commutative_clmulr_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: movzbl %dil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm0 |
| ; SSE-PCLMUL-NEXT: movzbl %sil, %eax |
| ; SSE-PCLMUL-NEXT: movd %eax, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %rax |
| ; SSE-PCLMUL-NEXT: shrl $7, %eax |
| ; SSE-PCLMUL-NEXT: movb %al, (%rdx) |
| ; SSE-PCLMUL-NEXT: movb %al, (%rcx) |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: commutative_clmulr_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movzbl %dil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm0 |
| ; AVX-NEXT: movzbl %sil, %eax |
| ; AVX-NEXT: vmovd %eax, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %rax |
| ; AVX-NEXT: shrl $7, %eax |
| ; AVX-NEXT: movb %al, (%rdx) |
| ; AVX-NEXT: movb %al, (%rcx) |
| ; AVX-NEXT: retq |
| %x.ext = zext i8 %x to i16 |
| %y.ext = zext i8 %y to i16 |
| %clmul_xy = call i16 @llvm.clmul.i16(i16 %x.ext, i16 %y.ext) |
| %clmul_yx = call i16 @llvm.clmul.i16(i16 %y.ext, i16 %x.ext) |
| %clmul_xy_lshr = lshr i16 %clmul_xy, 7 |
| %clmul_yx_lshr = lshr i16 %clmul_yx, 7 |
| %clmulh_xy = trunc i16 %clmul_xy_lshr to i8 |
| %clmulh_yx = trunc i16 %clmul_yx_lshr to i8 |
| store i8 %clmulh_xy, ptr %p0 |
| store i8 %clmulh_yx, ptr %p1 |
| ret void |
| } |
| |
| define void @mul_use_commutative_clmul_i8(i8 %x, i8 %y, ptr %p0, ptr %p1) nounwind { |
| ; SCALAR-LABEL: mul_use_commutative_clmul_i8: |
| ; SCALAR: # %bb.0: |
| ; SCALAR-NEXT: pushq %rbp |
| ; SCALAR-NEXT: pushq %rbx |
| ; SCALAR-NEXT: pushq %rax |
| ; SCALAR-NEXT: movq %rcx, %rbx |
| ; SCALAR-NEXT: # kill: def $edi killed $edi def $rdi |
| ; SCALAR-NEXT: xorl %eax, %eax |
| ; SCALAR-NEXT: testb $1, %sil |
| ; SCALAR-NEXT: movl %edi, %ebp |
| ; SCALAR-NEXT: cmovel %eax, %ebp |
| ; SCALAR-NEXT: leal (%rdi,%rdi), %ecx |
| ; SCALAR-NEXT: movzbl %cl, %ecx |
| ; SCALAR-NEXT: testb $2, %sil |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: xorl %ecx, %ebp |
| ; SCALAR-NEXT: leal (,%rdi,4), %ecx |
| ; SCALAR-NEXT: movzbl %cl, %ecx |
| ; SCALAR-NEXT: testb $4, %sil |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: leal (,%rdi,8), %r8d |
| ; SCALAR-NEXT: movzbl %r8b, %r8d |
| ; SCALAR-NEXT: testb $8, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: xorl %ecx, %r8d |
| ; SCALAR-NEXT: xorl %r8d, %ebp |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shlb $4, %cl |
| ; SCALAR-NEXT: movzbl %cl, %ecx |
| ; SCALAR-NEXT: testb $16, %sil |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: movl %edi, %r8d |
| ; SCALAR-NEXT: shlb $5, %r8b |
| ; SCALAR-NEXT: movzbl %r8b, %r8d |
| ; SCALAR-NEXT: testb $32, %sil |
| ; SCALAR-NEXT: cmovel %eax, %r8d |
| ; SCALAR-NEXT: xorl %ecx, %r8d |
| ; SCALAR-NEXT: movl %edi, %ecx |
| ; SCALAR-NEXT: shlb $6, %cl |
| ; SCALAR-NEXT: movzbl %cl, %ecx |
| ; SCALAR-NEXT: testb $64, %sil |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: xorl %r8d, %ecx |
| ; SCALAR-NEXT: xorl %ecx, %ebp |
| ; SCALAR-NEXT: shlb $7, %dil |
| ; SCALAR-NEXT: movzbl %dil, %ecx |
| ; SCALAR-NEXT: testb $-128, %sil |
| ; SCALAR-NEXT: cmovel %eax, %ecx |
| ; SCALAR-NEXT: xorl %ecx, %ebp |
| ; SCALAR-NEXT: movb %bpl, (%rdx) |
| ; SCALAR-NEXT: movl %ebp, %edi |
| ; SCALAR-NEXT: callq use@PLT |
| ; SCALAR-NEXT: movb %bpl, (%rbx) |
| ; SCALAR-NEXT: addq $8, %rsp |
| ; SCALAR-NEXT: popq %rbx |
| ; SCALAR-NEXT: popq %rbp |
| ; SCALAR-NEXT: retq |
| ; |
| ; SSE-PCLMUL-LABEL: mul_use_commutative_clmul_i8: |
| ; SSE-PCLMUL: # %bb.0: |
| ; SSE-PCLMUL-NEXT: pushq %r14 |
| ; SSE-PCLMUL-NEXT: pushq %rbx |
| ; SSE-PCLMUL-NEXT: pushq %rax |
| ; SSE-PCLMUL-NEXT: movq %rcx, %rbx |
| ; SSE-PCLMUL-NEXT: movd %esi, %xmm0 |
| ; SSE-PCLMUL-NEXT: movd %edi, %xmm1 |
| ; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1 |
| ; SSE-PCLMUL-NEXT: movq %xmm1, %r14 |
| ; SSE-PCLMUL-NEXT: movb %r14b, (%rdx) |
| ; SSE-PCLMUL-NEXT: movl %r14d, %edi |
| ; SSE-PCLMUL-NEXT: callq use@PLT |
| ; SSE-PCLMUL-NEXT: movb %r14b, (%rbx) |
| ; SSE-PCLMUL-NEXT: addq $8, %rsp |
| ; SSE-PCLMUL-NEXT: popq %rbx |
| ; SSE-PCLMUL-NEXT: popq %r14 |
| ; SSE-PCLMUL-NEXT: retq |
| ; |
| ; AVX-LABEL: mul_use_commutative_clmul_i8: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: pushq %r14 |
| ; AVX-NEXT: pushq %rbx |
| ; AVX-NEXT: pushq %rax |
| ; AVX-NEXT: movq %rcx, %rbx |
| ; AVX-NEXT: vmovd %esi, %xmm0 |
| ; AVX-NEXT: vmovd %edi, %xmm1 |
| ; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: vmovq %xmm0, %r14 |
| ; AVX-NEXT: movb %r14b, (%rdx) |
| ; AVX-NEXT: movl %r14d, %edi |
| ; AVX-NEXT: callq use@PLT |
| ; AVX-NEXT: movb %r14b, (%rbx) |
| ; AVX-NEXT: addq $8, %rsp |
| ; AVX-NEXT: popq %rbx |
| ; AVX-NEXT: popq %r14 |
| ; AVX-NEXT: retq |
| %xy = call i8 @llvm.clmul.i8(i8 %x, i8 %y) |
| %yx = call i8 @llvm.clmul.i8(i8 %y, i8 %x) |
| store i8 %xy, ptr %p0 |
| call void @use(i8 %xy) |
| store i8 %yx, ptr %p1 |
| ret void |
| } |