amd-builtins/math64/expm1D.cl - libclc - Git at Google

 /*
  * Copyright (c) 2014 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  * copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
 #include "math64.h"

 __attribute__((overloadable)) double
 expm1(double x)
 {
     USE_TABLE(double2, p_tbl, TWO_TO_JBY64_EP);

     const double max_expm1_arg = 709.8;
     const double min_expm1_arg = -37.42994775023704;
     const double log_OnePlus_OneByFour = 0.22314355131420976;   //0x3FCC8FF7C79A9A22 = log(1+1/4)
     const double log_OneMinus_OneByFour = -0.28768207245178096; //0xBFD269621134DB93 = log(1-1/4)
     const double sixtyfour_by_lnof2 = 92.33248261689366;        //0x40571547652b82fe
     const double lnof2_by_64_head = 0.010830424696223417;       //0x3f862e42fefa0000
     const double lnof2_by_64_tail = 2.5728046223276688e-14;     //0x3d1cf79abc9e3b39


     // First, assume log(1-1/4) < x < log(1+1/4) i.e  -0.28768 < x < 0.22314
     double u = as_double(as_ulong(x) & 0xffffffffff000000UL);
     double v = x - u;
     double y = u * u * 0.5;
     double z = v * (x + u) * 0.5;

     double q = fma(x,
 	           fma(x,
 		       fma(x,
 			   fma(x,
 			       fma(x,
 				   fma(x,
 				       fma(x,
 					   fma(x,2.4360682937111612e-8, 2.7582184028154370e-7),
 					   2.7558212415361945e-6),
 				       2.4801576918453420e-5),
 				   1.9841269447671544e-4),
 			       1.3888888890687830e-3),
 			   8.3333333334012270e-3),
 		       4.1666666666665560e-2),
 		   1.6666666666666632e-1);
     q *= x * x * x;

     double z1g = (u + y) + (q + (v + z));
     double z1 = x + (y + (q + z));
     z1 = y >= 0x1.0p-7 ? z1g : z1;

     // Now assume outside interval around 0
     int n = (int)(x * sixtyfour_by_lnof2);
     int j = n & 0x3f;
     int m = n >> 6;

     double2 tv = p_tbl[j];
     double f1 = tv.s0;
     double f2 = tv.s1;
     double f = f1 + f2;

     double dn = -n;
     double r = fma(dn, lnof2_by_64_tail, fma(dn, lnof2_by_64_head, x));

     q = fma(r,
 	    fma(r,
 		fma(r,
 		    fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
 		    4.16666666662260795726e-02),
 		1.66666666665260878863e-01),
 	     5.00000000000000008883e-01);
     q = fma(r*r, q, r);

     double twopm = as_double((long)(m + EXPBIAS_DP64) << EXPSHIFTBITS_DP64);
     double twopmm = as_double((long)(EXPBIAS_DP64 - m) << EXPSHIFTBITS_DP64);

     // Computations for m > 52, including where result is close to Inf
     ulong uval = as_ulong(0x1.0p+1023 * (f1 + (f * q + (f2))));
     int e = (int)(uval >> EXPSHIFTBITS_DP64) + 1;

     double zme1024 = as_double(((long)e << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64));
     zme1024 = e == 2047 ? as_double(PINFBITPATT_DP64) : zme1024;

     double zmg52 = twopm * (f1 + fma(f, q, f2 - twopmm));
     zmg52 = m == 1024 ? zme1024 : zmg52;

     // For m < 53
     double zml53 = twopm * ((f1 - twopmm) + fma(f1, q, f2*(1.0 + q)));

     // For m < -7
     double zmln7 = fma(twopm,  f1 + fma(f, q, f2), -1.0);

     z = m < 53 ? zml53 : zmg52;
     z = m < -7 ? zmln7 : z;
     z = x > log_OneMinus_OneByFour & x < log_OnePlus_OneByFour ? z1 : z;
     z = x > max_expm1_arg ? as_double(PINFBITPATT_DP64) : z;
     z = x < min_expm1_arg ? -1.0 : z;

     return z;
 }
	/*
	* Copyright (c) 2014 Advanced Micro Devices, Inc.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	* THE SOFTWARE.
	*/
	#include "math64.h"

	__attribute__((overloadable)) double
	expm1(double x)
	{
	USE_TABLE(double2, p_tbl, TWO_TO_JBY64_EP);

	const double max_expm1_arg = 709.8;
	const double min_expm1_arg = -37.42994775023704;
	const double log_OnePlus_OneByFour = 0.22314355131420976; //0x3FCC8FF7C79A9A22 = log(1+1/4)
	const double log_OneMinus_OneByFour = -0.28768207245178096; //0xBFD269621134DB93 = log(1-1/4)
	const double sixtyfour_by_lnof2 = 92.33248261689366; //0x40571547652b82fe
	const double lnof2_by_64_head = 0.010830424696223417; //0x3f862e42fefa0000
	const double lnof2_by_64_tail = 2.5728046223276688e-14; //0x3d1cf79abc9e3b39


	// First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314
	double u = as_double(as_ulong(x) & 0xffffffffff000000UL);
	double v = x - u;
	double y = u * u * 0.5;
	double z = v * (x + u) * 0.5;

	double q = fma(x,
	fma(x,
	fma(x,
	fma(x,
	fma(x,
	fma(x,
	fma(x,
	fma(x,2.4360682937111612e-8, 2.7582184028154370e-7),
	2.7558212415361945e-6),
	2.4801576918453420e-5),
	1.9841269447671544e-4),
	1.3888888890687830e-3),
	8.3333333334012270e-3),
	4.1666666666665560e-2),
	1.6666666666666632e-1);
	q = x x * x;

	double z1g = (u + y) + (q + (v + z));
	double z1 = x + (y + (q + z));
	z1 = y >= 0x1.0p-7 ? z1g : z1;

	// Now assume outside interval around 0
	int n = (int)(x * sixtyfour_by_lnof2);
	int j = n & 0x3f;
	int m = n >> 6;

	double2 tv = p_tbl[j];
	double f1 = tv.s0;
	double f2 = tv.s1;
	double f = f1 + f2;

	double dn = -n;
	double r = fma(dn, lnof2_by_64_tail, fma(dn, lnof2_by_64_head, x));

	q = fma(r,
	fma(r,
	fma(r,
	fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
	4.16666666662260795726e-02),
	1.66666666665260878863e-01),
	5.00000000000000008883e-01);
	q = fma(r*r, q, r);

	double twopm = as_double((long)(m + EXPBIAS_DP64) << EXPSHIFTBITS_DP64);
	double twopmm = as_double((long)(EXPBIAS_DP64 - m) << EXPSHIFTBITS_DP64);

	// Computations for m > 52, including where result is close to Inf
	ulong uval = as_ulong(0x1.0p+1023 * (f1 + (f * q + (f2))));
	int e = (int)(uval >> EXPSHIFTBITS_DP64) + 1;

	double zme1024 = as_double(((long)e << EXPSHIFTBITS_DP64) \| (uval & MANTBITS_DP64));
	zme1024 = e == 2047 ? as_double(PINFBITPATT_DP64) : zme1024;

	double zmg52 = twopm * (f1 + fma(f, q, f2 - twopmm));
	zmg52 = m == 1024 ? zme1024 : zmg52;

	// For m < 53
	double zml53 = twopm * ((f1 - twopmm) + fma(f1, q, f2*(1.0 + q)));

	// For m < -7
	double zmln7 = fma(twopm, f1 + fma(f, q, f2), -1.0);

	z = m < 53 ? zml53 : zmg52;
	z = m < -7 ? zmln7 : z;
	z = x > log_OneMinus_OneByFour & x < log_OnePlus_OneByFour ? z1 : z;
	z = x > max_expm1_arg ? as_double(PINFBITPATT_DP64) : z;
	z = x < min_expm1_arg ? -1.0 : z;

	return z;
	}