lib/builtins/arm/dcmp.h - llvm-project/compiler-rt - Git at Google

 //===-- dcmp.h - shared code for double-precision FP comparison functions -===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This code is the skeleton of a double-precision FP compare, with two details
 // left out: which input value is in which register, and how to make the return
 // value. It allows the main comparison logic to be shared between (for
 // example) __ledf2 and __gedf2, varying only those details.
 //
 //===----------------------------------------------------------------------===//

 // How to use this header file:
 //
 // This header file is expected to be #included from inside a function
 // definition in a .S file. The source file including this header should
 // provide the following:
 //
 // op0h, op0l, op1h, op1l: register aliases (via .req) for the registers
 // containing the input operands.
 //  - For most comparisons, op0h,op0l will correspond to xh,xl, and op1h,op1l
 //    to yh,yl (as defined in turn in crt_endian.h).
 //  - But a function with the reversed semantics of __aeabi_cdrcmple wil define
 //    them the other way round.
 //
 // SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
 // an appropriate return value in r0, for the cases that do *not* involve NaN.
 //  - On entry to this macro, the condition codes LO, EQ and HI indicate that
 //    op0 < op1, op0 == op1 or op0 > op1 respectively.
 //  - For functions that return a result in the flags, this macro can be empty,
 //    because those are the correct flags to return anyway.
 //  - Functions that return a boolean in r0 should set it up by checking the
 //    flags.
 //
 // SetReturnRegisterNE: a macro that does the same thing as SetReturnRegister,
 // except that if the Z flag is set, it instead does nothing at all. (This
 // macro must not assume that the flags were set by a single CMP: in
 // particular, C=0 but Z=1 is possible on entry to this macro, so you must not
 // use the LO condition code and assume it is mutually exclusive with EQ.)
 //
 // LOCAL_LABEL(NaN): a label defined within the compare function, after the
 // #include of this header. Called when at least one input is a NaN, and sets
 // up the appropriate return value for that case.

 // --------------------------------------------------
 // The actual entry point of the compare function.
 //
 // The basic plan is to start by ORing together the two inputs. This tells us
 // two things:
 //  - the top bit of the output tells us whether both inputs are positive, or
 //    whether at least one is negative
 //  - if the 11 exponent bits of the output are not all 1, then there are
 //    definitely no NaNs, so a fast path can handle most non-NaN cases.

 // clang-format off

   // First diverge control for the negative-numbers case.
   orrs    r12, op0h, op1h
   bmi     LOCAL_LABEL(negative)         // high bit set => at least one negative input

   // Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
   // r12. This will carry all the way into the top bit, setting the N flag, if
   // all 11 exponent bits were set.
   cmn     r12, #1 << 20
   bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs

   // The fastest fast path: both inputs positive and we could easily tell there
   // were no NaNs. So we just compare op0 and op1 as unsigned integers.
   cmp     op0h, op1h
   SetReturnRegisterNE
   bxne    lr
   cmp     op0l, op1l
   SetReturnRegister
   bx      lr

 LOCAL_LABEL(NaNInf_check_positive):
   // Second tier for positive numbers. We come here if both inputs are
   // positive, but our fast initial check didn't manage to rule out a NaN. But
   // it's not guaranteed that there _is_ a NaN, for two reasons:
   //
   //  1. An input with exponent 0x7FF might be an infinity instead. Those
   //     behave normally under comparison.
   //
   //  2. There might not even _be_ an input with exponent 0x7FF. All we know so
   //     far is that the two inputs ORed together had all the exponent bits
   //     set. So each of those bits is set in _at least one_ of the inputs, but
   //     not necessarily all in the _same_ input.
   //
   // Test each exponent individually for 0x7FF, using the same CMN idiom as
   // above. If neither one carries into the sign bit then we have no NaNs _or_
   // infinities and can compare the registers and return again.
   cmn     op0h, #1 << 20
   cmnpl   op1h, #1 << 20
   bmi     LOCAL_LABEL(NaN_check_positive)

   // Second-tier return path, now we've ruled out anything difficult. By this
   // time we know that the two operands have different exponents (because the
   // exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
   // must have a set bit not present in the other). So we only need to compare
   // the high words.
   cmp     op0h, op1h
   SetReturnRegister
   bx      lr

 LOCAL_LABEL(NaN_check_positive):
   // Third tier for positive numbers. Here we know that at least one of the
   // inputs has exponent 0x7FF. But they might still be infinities rather than
   // NaNs. So now we must check whether there's an actual NaN.
   //
   // We do this by shifting the high word of each input left to get rid of the
   // sign bit, shifting a bit in at the bottom which is 1 if any bit is set in
   // the low word. Then we check if the result is _greater_ than 0xFFE00000
   // (but not equal), via adding 0x00200000 to it and testing for the HI
   // condition (carry flag set, but Z clear).
   //
   // We could have skipped the second-tier check and done this more rigorous
   // test immediately. But that would cost an extra instruction in the case
   // where there are no infinities or NaNs, and we assume that that is so much
   // more common that it's worth optimizing for.
   cmp     op0l, #1           // set C if op0l is nonzero
   adc     op0h, op0h, op0h   // shift op0h left, bringing in the C bit
   cmp     op1l, #1           // set C if op1l is nonzero
   adc     op1h, op1h, op1h   // shift op1h left, bringing in the C bit
   cmn     op0h, #1 << 21     // if HI, then op0 is a NaN
   cmnls   op1h, #1 << 21     // if not HI, then do the same check for op1
   bhi     LOCAL_LABEL(NaN)           // now, if HI, there's definitely a NaN

   // Now we've finally ruled out NaNs! And we still know both inputs are
   // positive. So the third-tier return path can just compare the top words
   // again. (The fact that we've just shifted them left doesn't make a
   // difference.)
   cmp     op0h, op1h
   SetReturnRegister
   bx      lr

 LOCAL_LABEL(negative):
   // We come here if at least one operand is negative. We haven't checked for
   // NaNs at all yet (the sign check came first), so repeat the first-tier
   // check strategy of seeing if all exponent bits are set in r12.
   //
   // On this path, the sign bit in r12 is set, so if adding 1 to the low
   // exponent bit carries all the way through into the sign bit, it will
   // _clear_ the sign bit rather than setting it. So we expect MI to be the
   // "definitely no NaNs" result, where it was PL on the positive branch.
   cmn     r12, #1 << 20
   bpl     LOCAL_LABEL(NaNInf_check_negative)

   // Now we have no NaNs, but at least one negative number. This gives us two
   // complications:
   //
   //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
   //     have to consider separately the cases of "both negative" and "one of
   //     each sign".
   //
   //  2. -0 and +0 are required to compare equal.
   //
   // But problem #1 is not as hard as it sounds! If both operands are negative,
   // then we can get the result we want by comparing them as unsigned integers
   // the opposite way round, because the input with the smaller value (as an
   // integer) is the larger number in an FP ordering sense. And if one operand
   // is negative and the other is positive, the _same_ reversed comparison
   // works, because the positive number (with zero sign bit) will always
   // compare less than the negative one in an unsigned-integers sense.
   //
   // So we only have to worry about problem #2, signed zeroes. This only
   // affects the answer if _both_ operands are zero. So we check that by
   // testing all bits of both operands apart from the sign bit.
   orrs    r12, op0l, op0h, LSL #1 // EQ if op0 is zero
   orrseq  r12, op1l, op1h, LSL #1 // now only EQ if both are zero
   cmpne   op1h, op0h              // otherwise, compare them backwards
   SetReturnRegisterNE
   bxne    lr
   cmp     op1l, op0l
   SetReturnRegister
   bx      lr

 LOCAL_LABEL(NaNInf_check_negative):
   // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
   // but again, we might not have either _actual_ exponent 0xFF, and also, an
   // exponent 0xFF might be an infinity instead of a NaN.
   //
   // On this path we've already branched twice (once for negative numbers and
   // once for the first-tier NaN check), so we'll just go straight to the
   // precise check for NaNs.
   //
   // Like the NaNInf_check_positive case, we do each NaN check by making a
   // word consisting of (high word << 1) OR (1 if low word is nonzero). But
   // unlike the positive case, we can't make those words _in place_,
   // overwriting op0h and op1h themselves, because that would shift the sign
   // bits off the top, and we still need the sign bits to get the comparison
   // right. (In the positive case, we knew both sign bits were 0, enabling a
   // shortcut.)
   cmp     op0l, #1           // set C if op0l is nonzero
   adc     r12, op0h, op0h    // shift op0h left, bringing in the C bit
   cmn     r12, #1 << 21      // if HI, then op0 is a NaN
   bhi     LOCAL_LABEL(NaN)
   cmp     op1l, #1           // set C if op1l is nonzero
   adc     r12, op1h, op1h    // shift op1h left, bringing in the C bit
   cmn     r12, #1 << 21      // if HI, then op1 is a NaN
   bhi     LOCAL_LABEL(NaN)

   // Now we've ruled out NaNs, so we can just compare the two input registers
   // and return. On this path we _don't_ need to check for the special case of
   // comparing two zeroes, because we only came here if the bitwise OR of the
   // exponent fields was 0x7FF, which means the exponents can't both have been
   // zero! So we can _just_ do the reversed CMP and finish.
   cmp     op1h, op0h
   SetReturnRegister
   bx      lr
	//===-- dcmp.h - shared code for double-precision FP comparison functions -===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This code is the skeleton of a double-precision FP compare, with two details
	// left out: which input value is in which register, and how to make the return
	// value. It allows the main comparison logic to be shared between (for
	// example) __ledf2 and __gedf2, varying only those details.
	//
	//===----------------------------------------------------------------------===//

	// How to use this header file:
	//
	// This header file is expected to be #included from inside a function
	// definition in a .S file. The source file including this header should
	// provide the following:
	//
	// op0h, op0l, op1h, op1l: register aliases (via .req) for the registers
	// containing the input operands.
	// - For most comparisons, op0h,op0l will correspond to xh,xl, and op1h,op1l
	// to yh,yl (as defined in turn in crt_endian.h).
	// - But a function with the reversed semantics of __aeabi_cdrcmple wil define
	// them the other way round.
	//
	// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
	// an appropriate return value in r0, for the cases that do not involve NaN.
	// - On entry to this macro, the condition codes LO, EQ and HI indicate that
	// op0 < op1, op0 == op1 or op0 > op1 respectively.
	// - For functions that return a result in the flags, this macro can be empty,
	// because those are the correct flags to return anyway.
	// - Functions that return a boolean in r0 should set it up by checking the
	// flags.
	//
	// SetReturnRegisterNE: a macro that does the same thing as SetReturnRegister,
	// except that if the Z flag is set, it instead does nothing at all. (This
	// macro must not assume that the flags were set by a single CMP: in
	// particular, C=0 but Z=1 is possible on entry to this macro, so you must not
	// use the LO condition code and assume it is mutually exclusive with EQ.)
	//
	// LOCAL_LABEL(NaN): a label defined within the compare function, after the
	// #include of this header. Called when at least one input is a NaN, and sets
	// up the appropriate return value for that case.

	// --------------------------------------------------
	// The actual entry point of the compare function.
	//
	// The basic plan is to start by ORing together the two inputs. This tells us
	// two things:
	// - the top bit of the output tells us whether both inputs are positive, or
	// whether at least one is negative
	// - if the 11 exponent bits of the output are not all 1, then there are
	// definitely no NaNs, so a fast path can handle most non-NaN cases.

	// clang-format off

	// First diverge control for the negative-numbers case.
	orrs r12, op0h, op1h
	bmi LOCAL_LABEL(negative) // high bit set => at least one negative input

	// Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
	// r12. This will carry all the way into the top bit, setting the N flag, if
	// all 11 exponent bits were set.
	cmn r12, #1 << 20
	bmi LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs

	// The fastest fast path: both inputs positive and we could easily tell there
	// were no NaNs. So we just compare op0 and op1 as unsigned integers.
	cmp op0h, op1h
	SetReturnRegisterNE
	bxne lr
	cmp op0l, op1l
	SetReturnRegister
	bx lr

	LOCAL_LABEL(NaNInf_check_positive):
	// Second tier for positive numbers. We come here if both inputs are
	// positive, but our fast initial check didn't manage to rule out a NaN. But
	// it's not guaranteed that there _is_ a NaN, for two reasons:
	//
	// 1. An input with exponent 0x7FF might be an infinity instead. Those
	// behave normally under comparison.
	//
	// 2. There might not even _be_ an input with exponent 0x7FF. All we know so
	// far is that the two inputs ORed together had all the exponent bits
	// set. So each of those bits is set in _at least one_ of the inputs, but
	// not necessarily all in the _same_ input.
	//
	// Test each exponent individually for 0x7FF, using the same CMN idiom as
	// above. If neither one carries into the sign bit then we have no NaNs _or_
	// infinities and can compare the registers and return again.
	cmn op0h, #1 << 20
	cmnpl op1h, #1 << 20
	bmi LOCAL_LABEL(NaN_check_positive)

	// Second-tier return path, now we've ruled out anything difficult. By this
	// time we know that the two operands have different exponents (because the
	// exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
	// must have a set bit not present in the other). So we only need to compare
	// the high words.
	cmp op0h, op1h
	SetReturnRegister
	bx lr

	LOCAL_LABEL(NaN_check_positive):
	// Third tier for positive numbers. Here we know that at least one of the
	// inputs has exponent 0x7FF. But they might still be infinities rather than
	// NaNs. So now we must check whether there's an actual NaN.
	//
	// We do this by shifting the high word of each input left to get rid of the
	// sign bit, shifting a bit in at the bottom which is 1 if any bit is set in
	// the low word. Then we check if the result is _greater_ than 0xFFE00000
	// (but not equal), via adding 0x00200000 to it and testing for the HI
	// condition (carry flag set, but Z clear).
	//
	// We could have skipped the second-tier check and done this more rigorous
	// test immediately. But that would cost an extra instruction in the case
	// where there are no infinities or NaNs, and we assume that that is so much
	// more common that it's worth optimizing for.
	cmp op0l, #1 // set C if op0l is nonzero
	adc op0h, op0h, op0h // shift op0h left, bringing in the C bit
	cmp op1l, #1 // set C if op1l is nonzero
	adc op1h, op1h, op1h // shift op1h left, bringing in the C bit
	cmn op0h, #1 << 21 // if HI, then op0 is a NaN
	cmnls op1h, #1 << 21 // if not HI, then do the same check for op1
	bhi LOCAL_LABEL(NaN) // now, if HI, there's definitely a NaN

	// Now we've finally ruled out NaNs! And we still know both inputs are
	// positive. So the third-tier return path can just compare the top words
	// again. (The fact that we've just shifted them left doesn't make a
	// difference.)
	cmp op0h, op1h
	SetReturnRegister
	bx lr

	LOCAL_LABEL(negative):
	// We come here if at least one operand is negative. We haven't checked for
	// NaNs at all yet (the sign check came first), so repeat the first-tier
	// check strategy of seeing if all exponent bits are set in r12.
	//
	// On this path, the sign bit in r12 is set, so if adding 1 to the low
	// exponent bit carries all the way through into the sign bit, it will
	// _clear_ the sign bit rather than setting it. So we expect MI to be the
	// "definitely no NaNs" result, where it was PL on the positive branch.
	cmn r12, #1 << 20
	bpl LOCAL_LABEL(NaNInf_check_negative)

	// Now we have no NaNs, but at least one negative number. This gives us two
	// complications:
	//
	// 1. Floating-point numbers are sign/magnitude, not two's complement, so we
	// have to consider separately the cases of "both negative" and "one of
	// each sign".
	//
	// 2. -0 and +0 are required to compare equal.
	//
	// But problem #1 is not as hard as it sounds! If both operands are negative,
	// then we can get the result we want by comparing them as unsigned integers
	// the opposite way round, because the input with the smaller value (as an
	// integer) is the larger number in an FP ordering sense. And if one operand
	// is negative and the other is positive, the _same_ reversed comparison
	// works, because the positive number (with zero sign bit) will always
	// compare less than the negative one in an unsigned-integers sense.
	//
	// So we only have to worry about problem #2, signed zeroes. This only
	// affects the answer if _both_ operands are zero. So we check that by
	// testing all bits of both operands apart from the sign bit.
	orrs r12, op0l, op0h, LSL #1 // EQ if op0 is zero
	orrseq r12, op1l, op1h, LSL #1 // now only EQ if both are zero
	cmpne op1h, op0h // otherwise, compare them backwards
	SetReturnRegisterNE
	bxne lr
	cmp op1l, op0l
	SetReturnRegister
	bx lr

	LOCAL_LABEL(NaNInf_check_negative):
	// Second tier for negative numbers: we know the OR of the exponents is 0xFF,
	// but again, we might not have either _actual_ exponent 0xFF, and also, an
	// exponent 0xFF might be an infinity instead of a NaN.
	//
	// On this path we've already branched twice (once for negative numbers and
	// once for the first-tier NaN check), so we'll just go straight to the
	// precise check for NaNs.
	//
	// Like the NaNInf_check_positive case, we do each NaN check by making a
	// word consisting of (high word << 1) OR (1 if low word is nonzero). But
	// unlike the positive case, we can't make those words _in place_,
	// overwriting op0h and op1h themselves, because that would shift the sign
	// bits off the top, and we still need the sign bits to get the comparison
	// right. (In the positive case, we knew both sign bits were 0, enabling a
	// shortcut.)
	cmp op0l, #1 // set C if op0l is nonzero
	adc r12, op0h, op0h // shift op0h left, bringing in the C bit
	cmn r12, #1 << 21 // if HI, then op0 is a NaN
	bhi LOCAL_LABEL(NaN)
	cmp op1l, #1 // set C if op1l is nonzero
	adc r12, op1h, op1h // shift op1h left, bringing in the C bit
	cmn r12, #1 << 21 // if HI, then op1 is a NaN
	bhi LOCAL_LABEL(NaN)

	// Now we've ruled out NaNs, so we can just compare the two input registers
	// and return. On this path we _don't_ need to check for the special case of
	// comparing two zeroes, because we only came here if the bitwise OR of the
	// exponent fields was 0x7FF, which means the exponents can't both have been
	// zero! So we can _just_ do the reversed CMP and finish.
	cmp op1h, op0h
	SetReturnRegister
	bx lr