blob: 2dd7a36e857f72ccfa672c1c22a5deebba89c9e8 [file] [edit]
//===-- dcmp.h - shared code for double-precision FP comparison functions -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This code is the skeleton of a double-precision FP compare, with two details
// left out: which input value is in which register, and how to make the return
// value. It allows the main comparison logic to be shared between (for
// example) __ledf2 and __gedf2, varying only those details.
//
//===----------------------------------------------------------------------===//
// How to use this header file:
//
// This header file is expected to be #included from inside a function
// definition in a .S file. The source file including this header should
// provide the following:
//
// op0h, op0l, op1h, op1l: register aliases (via .req) for the registers
// containing the input operands.
// - For most comparisons, op0h,op0l will correspond to xh,xl, and op1h,op1l
// to yh,yl (as defined in turn in crt_endian.h).
// - But a function with the reversed semantics of __aeabi_cdrcmple wil define
// them the other way round.
//
// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
// an appropriate return value in r0, for the cases that do *not* involve NaN.
// - On entry to this macro, the condition codes LO, EQ and HI indicate that
// op0 < op1, op0 == op1 or op0 > op1 respectively.
// - For functions that return a result in the flags, this macro can be empty,
// because those are the correct flags to return anyway.
// - Functions that return a boolean in r0 should set it up by checking the
// flags.
//
// SetReturnRegisterNE: a macro that does the same thing as SetReturnRegister,
// except that if the Z flag is set, it instead does nothing at all. (This
// macro must not assume that the flags were set by a single CMP: in
// particular, C=0 but Z=1 is possible on entry to this macro, so you must not
// use the LO condition code and assume it is mutually exclusive with EQ.)
//
// LOCAL_LABEL(NaN): a label defined within the compare function, after the
// #include of this header. Called when at least one input is a NaN, and sets
// up the appropriate return value for that case.
// --------------------------------------------------
// The actual entry point of the compare function.
//
// The basic plan is to start by ORing together the two inputs. This tells us
// two things:
// - the top bit of the output tells us whether both inputs are positive, or
// whether at least one is negative
// - if the 11 exponent bits of the output are not all 1, then there are
// definitely no NaNs, so a fast path can handle most non-NaN cases.
// clang-format off
// First diverge control for the negative-numbers case.
orrs r12, op0h, op1h
bmi LOCAL_LABEL(negative) // high bit set => at least one negative input
// Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
// r12. This will carry all the way into the top bit, setting the N flag, if
// all 11 exponent bits were set.
cmn r12, #1 << 20
bmi LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
// The fastest fast path: both inputs positive and we could easily tell there
// were no NaNs. So we just compare op0 and op1 as unsigned integers.
cmp op0h, op1h
SetReturnRegisterNE
bxne lr
cmp op0l, op1l
SetReturnRegister
bx lr
LOCAL_LABEL(NaNInf_check_positive):
// Second tier for positive numbers. We come here if both inputs are
// positive, but our fast initial check didn't manage to rule out a NaN. But
// it's not guaranteed that there _is_ a NaN, for two reasons:
//
// 1. An input with exponent 0x7FF might be an infinity instead. Those
// behave normally under comparison.
//
// 2. There might not even _be_ an input with exponent 0x7FF. All we know so
// far is that the two inputs ORed together had all the exponent bits
// set. So each of those bits is set in _at least one_ of the inputs, but
// not necessarily all in the _same_ input.
//
// Test each exponent individually for 0x7FF, using the same CMN idiom as
// above. If neither one carries into the sign bit then we have no NaNs _or_
// infinities and can compare the registers and return again.
cmn op0h, #1 << 20
cmnpl op1h, #1 << 20
bmi LOCAL_LABEL(NaN_check_positive)
// Second-tier return path, now we've ruled out anything difficult. By this
// time we know that the two operands have different exponents (because the
// exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
// must have a set bit not present in the other). So we only need to compare
// the high words.
cmp op0h, op1h
SetReturnRegister
bx lr
LOCAL_LABEL(NaN_check_positive):
// Third tier for positive numbers. Here we know that at least one of the
// inputs has exponent 0x7FF. But they might still be infinities rather than
// NaNs. So now we must check whether there's an actual NaN.
//
// We do this by shifting the high word of each input left to get rid of the
// sign bit, shifting a bit in at the bottom which is 1 if any bit is set in
// the low word. Then we check if the result is _greater_ than 0xFFE00000
// (but not equal), via adding 0x00200000 to it and testing for the HI
// condition (carry flag set, but Z clear).
//
// We could have skipped the second-tier check and done this more rigorous
// test immediately. But that would cost an extra instruction in the case
// where there are no infinities or NaNs, and we assume that that is so much
// more common that it's worth optimizing for.
cmp op0l, #1 // set C if op0l is nonzero
adc op0h, op0h, op0h // shift op0h left, bringing in the C bit
cmp op1l, #1 // set C if op1l is nonzero
adc op1h, op1h, op1h // shift op1h left, bringing in the C bit
cmn op0h, #1 << 21 // if HI, then op0 is a NaN
cmnls op1h, #1 << 21 // if not HI, then do the same check for op1
bhi LOCAL_LABEL(NaN) // now, if HI, there's definitely a NaN
// Now we've finally ruled out NaNs! And we still know both inputs are
// positive. So the third-tier return path can just compare the top words
// again. (The fact that we've just shifted them left doesn't make a
// difference.)
cmp op0h, op1h
SetReturnRegister
bx lr
LOCAL_LABEL(negative):
// We come here if at least one operand is negative. We haven't checked for
// NaNs at all yet (the sign check came first), so repeat the first-tier
// check strategy of seeing if all exponent bits are set in r12.
//
// On this path, the sign bit in r12 is set, so if adding 1 to the low
// exponent bit carries all the way through into the sign bit, it will
// _clear_ the sign bit rather than setting it. So we expect MI to be the
// "definitely no NaNs" result, where it was PL on the positive branch.
cmn r12, #1 << 20
bpl LOCAL_LABEL(NaNInf_check_negative)
// Now we have no NaNs, but at least one negative number. This gives us two
// complications:
//
// 1. Floating-point numbers are sign/magnitude, not two's complement, so we
// have to consider separately the cases of "both negative" and "one of
// each sign".
//
// 2. -0 and +0 are required to compare equal.
//
// But problem #1 is not as hard as it sounds! If both operands are negative,
// then we can get the result we want by comparing them as unsigned integers
// the opposite way round, because the input with the smaller value (as an
// integer) is the larger number in an FP ordering sense. And if one operand
// is negative and the other is positive, the _same_ reversed comparison
// works, because the positive number (with zero sign bit) will always
// compare less than the negative one in an unsigned-integers sense.
//
// So we only have to worry about problem #2, signed zeroes. This only
// affects the answer if _both_ operands are zero. So we check that by
// testing all bits of both operands apart from the sign bit.
orrs r12, op0l, op0h, LSL #1 // EQ if op0 is zero
orrseq r12, op1l, op1h, LSL #1 // now only EQ if both are zero
cmpne op1h, op0h // otherwise, compare them backwards
SetReturnRegisterNE
bxne lr
cmp op1l, op0l
SetReturnRegister
bx lr
LOCAL_LABEL(NaNInf_check_negative):
// Second tier for negative numbers: we know the OR of the exponents is 0xFF,
// but again, we might not have either _actual_ exponent 0xFF, and also, an
// exponent 0xFF might be an infinity instead of a NaN.
//
// On this path we've already branched twice (once for negative numbers and
// once for the first-tier NaN check), so we'll just go straight to the
// precise check for NaNs.
//
// Like the NaNInf_check_positive case, we do each NaN check by making a
// word consisting of (high word << 1) OR (1 if low word is nonzero). But
// unlike the positive case, we can't make those words _in place_,
// overwriting op0h and op1h themselves, because that would shift the sign
// bits off the top, and we still need the sign bits to get the comparison
// right. (In the positive case, we knew both sign bits were 0, enabling a
// shortcut.)
cmp op0l, #1 // set C if op0l is nonzero
adc r12, op0h, op0h // shift op0h left, bringing in the C bit
cmn r12, #1 << 21 // if HI, then op0 is a NaN
bhi LOCAL_LABEL(NaN)
cmp op1l, #1 // set C if op1l is nonzero
adc r12, op1h, op1h // shift op1h left, bringing in the C bit
cmn r12, #1 << 21 // if HI, then op1 is a NaN
bhi LOCAL_LABEL(NaN)
// Now we've ruled out NaNs, so we can just compare the two input registers
// and return. On this path we _don't_ need to check for the special case of
// comparing two zeroes, because we only came here if the bitwise OR of the
// exponent fields was 0x7FF, which means the exponents can't both have been
// zero! So we can _just_ do the reversed CMP and finish.
cmp op1h, op0h
SetReturnRegister
bx lr