| /* ===-------- ia32intrin.h ---------------------------------------------------=== |
| * |
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| * See https://llvm.org/LICENSE.txt for license information. |
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| * |
| *===-----------------------------------------------------------------------=== |
| */ |
| |
| #ifndef __X86INTRIN_H |
| #error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead." |
| #endif |
| |
| #ifndef __IA32INTRIN_H |
| #define __IA32INTRIN_H |
| |
| /** Find the first set bit starting from the lsb. Result is undefined if |
| * input is 0. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSF </c> instruction or the |
| * <c> TZCNT </c> instruction. |
| * |
| * \param __A |
| * A 32-bit integer operand. |
| * \returns A 32-bit integer containing the bit number. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __bsfd(int __A) { |
| return __builtin_ctz(__A); |
| } |
| |
| /** Find the first set bit starting from the msb. Result is undefined if |
| * input is 0. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSR </c> instruction or the |
| * <c> LZCNT </c> instruction and an <c> XOR </c>. |
| * |
| * \param __A |
| * A 32-bit integer operand. |
| * \returns A 32-bit integer containing the bit number. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __bsrd(int __A) { |
| return 31 - __builtin_clz(__A); |
| } |
| |
| /** Swaps the bytes in the input. Converting little endian to big endian or |
| * vice versa. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSWAP </c> instruction. |
| * |
| * \param __A |
| * A 32-bit integer operand. |
| * \returns A 32-bit integer containing the swapped bytes. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __bswapd(int __A) { |
| return __builtin_bswap32(__A); |
| } |
| |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| _bswap(int __A) { |
| return __builtin_bswap32(__A); |
| } |
| |
| #define _bit_scan_forward(A) __bsfd((A)) |
| #define _bit_scan_reverse(A) __bsrd((A)) |
| |
| #ifdef __x86_64__ |
| /** Find the first set bit starting from the lsb. Result is undefined if |
| * input is 0. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSF </c> instruction or the |
| * <c> TZCNT </c> instruction. |
| * |
| * \param __A |
| * A 64-bit integer operand. |
| * \returns A 32-bit integer containing the bit number. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __bsfq(long long __A) { |
| return __builtin_ctzll(__A); |
| } |
| |
| /** Find the first set bit starting from the msb. Result is undefined if |
| * input is 0. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSR </c> instruction or the |
| * <c> LZCNT </c> instruction and an <c> XOR </c>. |
| * |
| * \param __A |
| * A 64-bit integer operand. |
| * \returns A 32-bit integer containing the bit number. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __bsrq(long long __A) { |
| return 63 - __builtin_clzll(__A); |
| } |
| |
| /** Swaps the bytes in the input. Converting little endian to big endian or |
| * vice versa. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> BSWAP </c> instruction. |
| * |
| * \param __A |
| * A 64-bit integer operand. |
| * \returns A 64-bit integer containing the swapped bytes. |
| */ |
| static __inline__ long long __attribute__((__always_inline__, __nodebug__)) |
| __bswapq(long long __A) { |
| return __builtin_bswap64(__A); |
| } |
| |
| #define _bswap64(A) __bswapq((A)) |
| #endif |
| |
| /** Counts the number of bits in the source operand having a value of 1. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> POPCNT </c> instruction or a |
| * a sequence of arithmetic and logic ops to calculate it. |
| * |
| * \param __A |
| * An unsigned 32-bit integer operand. |
| * \returns A 32-bit integer containing the number of bits with value 1 in the |
| * source operand. |
| */ |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| __popcntd(unsigned int __A) |
| { |
| return __builtin_popcount(__A); |
| } |
| |
| #define _popcnt32(A) __popcntd((A)) |
| |
| #ifdef __x86_64__ |
| /** Counts the number of bits in the source operand having a value of 1. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> POPCNT </c> instruction or a |
| * a sequence of arithmetic and logic ops to calculate it. |
| * |
| * \param __A |
| * An unsigned 64-bit integer operand. |
| * \returns A 64-bit integer containing the number of bits with value 1 in the |
| * source operand. |
| */ |
| static __inline__ long long __attribute__((__always_inline__, __nodebug__)) |
| __popcntq(unsigned long long __A) |
| { |
| return __builtin_popcountll(__A); |
| } |
| |
| #define _popcnt64(A) __popcntq((A)) |
| #endif /* __x86_64__ */ |
| |
| #ifdef __x86_64__ |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) |
| __readeflags(void) |
| { |
| return __builtin_ia32_readeflags_u64(); |
| } |
| |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
| __writeeflags(unsigned long long __f) |
| { |
| __builtin_ia32_writeeflags_u64(__f); |
| } |
| |
| #else /* !__x86_64__ */ |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
| __readeflags(void) |
| { |
| return __builtin_ia32_readeflags_u32(); |
| } |
| |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
| __writeeflags(unsigned int __f) |
| { |
| __builtin_ia32_writeeflags_u32(__f); |
| } |
| #endif /* !__x86_64__ */ |
| |
| /** Cast a 32-bit float value to a 32-bit unsigned integer value |
| * |
| * \headerfile <x86intrin.h> |
| * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64, |
| * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. |
| * |
| * \param __A |
| * A 32-bit float value. |
| * \returns a 32-bit unsigned integer containing the converted value. |
| */ |
| static __inline__ unsigned int __attribute__((__always_inline__)) |
| _castf32_u32(float __A) { |
| unsigned int D; |
| __builtin_memcpy(&D, &__A, sizeof(__A)); |
| return D; |
| } |
| |
| /** Cast a 64-bit float value to a 64-bit unsigned integer value |
| * |
| * \headerfile <x86intrin.h> |
| * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, |
| * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. |
| * |
| * \param __A |
| * A 64-bit float value. |
| * \returns a 64-bit unsigned integer containing the converted value. |
| */ |
| static __inline__ unsigned long long __attribute__((__always_inline__)) |
| _castf64_u64(double __A) { |
| unsigned long long D; |
| __builtin_memcpy(&D, &__A, sizeof(__A)); |
| return D; |
| } |
| |
| /** Cast a 32-bit unsigned integer value to a 32-bit float value |
| * |
| * \headerfile <x86intrin.h> |
| * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, |
| * and corresponds to the <c> FLDS </c> instruction in ia32. |
| * |
| * \param __A |
| * A 32-bit unsigned integer value. |
| * \returns a 32-bit float value containing the converted value. |
| */ |
| static __inline__ float __attribute__((__always_inline__)) |
| _castu32_f32(unsigned int __A) { |
| float D; |
| __builtin_memcpy(&D, &__A, sizeof(__A)); |
| return D; |
| } |
| |
| /** Cast a 64-bit unsigned integer value to a 64-bit float value |
| * |
| * \headerfile <x86intrin.h> |
| * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, |
| * and corresponds to the <c> FLDL </c> instruction in ia32. |
| * |
| * \param __A |
| * A 64-bit unsigned integer value. |
| * \returns a 64-bit float value containing the converted value. |
| */ |
| static __inline__ double __attribute__((__always_inline__)) |
| _castu64_f64(unsigned long long __A) { |
| double D; |
| __builtin_memcpy(&D, &__A, sizeof(__A)); |
| return D; |
| } |
| |
| /** Adds the unsigned integer operand to the CRC-32C checksum of the |
| * unsigned char operand. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> CRC32B </c> instruction. |
| * |
| * \param __C |
| * An unsigned integer operand to add to the CRC-32C checksum of operand |
| * \a __D. |
| * \param __D |
| * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. |
| * \returns The result of adding operand \a __C to the CRC-32C checksum of |
| * operand \a __D. |
| */ |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) |
| __crc32b(unsigned int __C, unsigned char __D) |
| { |
| return __builtin_ia32_crc32qi(__C, __D); |
| } |
| |
| /** Adds the unsigned integer operand to the CRC-32C checksum of the |
| * unsigned short operand. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> CRC32W </c> instruction. |
| * |
| * \param __C |
| * An unsigned integer operand to add to the CRC-32C checksum of operand |
| * \a __D. |
| * \param __D |
| * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. |
| * \returns The result of adding operand \a __C to the CRC-32C checksum of |
| * operand \a __D. |
| */ |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) |
| __crc32w(unsigned int __C, unsigned short __D) |
| { |
| return __builtin_ia32_crc32hi(__C, __D); |
| } |
| |
| /** Adds the unsigned integer operand to the CRC-32C checksum of the |
| * second unsigned integer operand. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> CRC32D </c> instruction. |
| * |
| * \param __C |
| * An unsigned integer operand to add to the CRC-32C checksum of operand |
| * \a __D. |
| * \param __D |
| * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. |
| * \returns The result of adding operand \a __C to the CRC-32C checksum of |
| * operand \a __D. |
| */ |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) |
| __crc32d(unsigned int __C, unsigned int __D) |
| { |
| return __builtin_ia32_crc32si(__C, __D); |
| } |
| |
| #ifdef __x86_64__ |
| /** Adds the unsigned integer operand to the CRC-32C checksum of the |
| * unsigned 64-bit integer operand. |
| * |
| * \headerfile <x86intrin.h> |
| * |
| * This intrinsic corresponds to the <c> CRC32Q </c> instruction. |
| * |
| * \param __C |
| * An unsigned integer operand to add to the CRC-32C checksum of operand |
| * \a __D. |
| * \param __D |
| * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. |
| * \returns The result of adding operand \a __C to the CRC-32C checksum of |
| * operand \a __D. |
| */ |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) |
| __crc32q(unsigned long long __C, unsigned long long __D) |
| { |
| return __builtin_ia32_crc32di(__C, __D); |
| } |
| #endif /* __x86_64__ */ |
| |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) |
| __rdpmc(int __A) { |
| return __builtin_ia32_rdpmc(__A); |
| } |
| |
| /* __rdtscp */ |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) |
| __rdtscp(unsigned int *__A) { |
| return __builtin_ia32_rdtscp(__A); |
| } |
| |
| #define _rdtsc() __rdtsc() |
| |
| #define _rdpmc(A) __rdpmc(A) |
| |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
| _wbinvd(void) { |
| __builtin_ia32_wbinvd(); |
| } |
| |
| static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) |
| __rolb(unsigned char __X, int __C) { |
| return __builtin_rotateleft8(__X, __C); |
| } |
| |
| static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) |
| __rorb(unsigned char __X, int __C) { |
| return __builtin_rotateright8(__X, __C); |
| } |
| |
| static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) |
| __rolw(unsigned short __X, int __C) { |
| return __builtin_rotateleft16(__X, __C); |
| } |
| |
| static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) |
| __rorw(unsigned short __X, int __C) { |
| return __builtin_rotateright16(__X, __C); |
| } |
| |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
| __rold(unsigned int __X, int __C) { |
| return __builtin_rotateleft32(__X, __C); |
| } |
| |
| static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
| __rord(unsigned int __X, int __C) { |
| return __builtin_rotateright32(__X, __C); |
| } |
| |
| #ifdef __x86_64__ |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) |
| __rolq(unsigned long long __X, int __C) { |
| return __builtin_rotateleft64(__X, __C); |
| } |
| |
| static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) |
| __rorq(unsigned long long __X, int __C) { |
| return __builtin_rotateright64(__X, __C); |
| } |
| #endif /* __x86_64__ */ |
| |
| #ifndef _MSC_VER |
| /* These are already provided as builtins for MSVC. */ |
| /* Select the correct function based on the size of long. */ |
| #ifdef __LP64__ |
| #define _lrotl(a,b) __rolq((a), (b)) |
| #define _lrotr(a,b) __rorq((a), (b)) |
| #else |
| #define _lrotl(a,b) __rold((a), (b)) |
| #define _lrotr(a,b) __rord((a), (b)) |
| #endif |
| #define _rotl(a,b) __rold((a), (b)) |
| #define _rotr(a,b) __rord((a), (b)) |
| #endif // _MSC_VER |
| |
| /* These are not builtins so need to be provided in all modes. */ |
| #define _rotwl(a,b) __rolw((a), (b)) |
| #define _rotwr(a,b) __rorw((a), (b)) |
| |
| #endif /* __IA32INTRIN_H */ |