amd-builtins/math32/fmaF.cl - libclc - Git at Google

 /*
  * Copyright (c) 2014 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  * copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */

 #include "math32.h"

 __attribute__((overloadable, always_inline)) float4
 fma(float4 a, float4 b, float4 c)
 {
     float4 ret;
     ret.lo = fma(a.lo, b.lo, c.lo);
     ret.hi = fma(a.hi, b.hi, c.hi);
     return ret;
 }

 __attribute__((overloadable, always_inline)) float2
 fma(float2 a, float2 b, float2 c)
 {
     float2 ret;
     ret.lo = fma(a.lo, b.lo, c.lo);
     ret.hi = fma(a.hi, b.hi, c.hi);
     return ret;
 }

 #define CM(C, B, A) as_float(__amdil_cmov_logical_i32(as_uint(C), as_uint(B), as_uint(A)))

 __attribute__((overloadable, always_inline)) float
 fma(float a, float b, float c)
 {
     if (HAVE_HW_FMA32()) {
 	return __amdil_fma_f32(a, b, c);
     } else {
         float z3 = mad(a, b, c);
         float cs = c;

         int ae = as_int(a) >> 23;
         int be = as_int(b) >> 23;
         int ce = as_int(c) >> 23;

         ae &= 0xff;
         be &= 0xff;
         ce &= 0xff;

         ae -= 127;
         be -= 127;
         ce -= 127;

         int pe = ae + be;

         int cen = ce - pe;
         cen += 127;
         cen <<= 23;

         // special cases flag
         int spclal = ae == -127;
         int spclbl = be == -127;
         int spclcl = ce == -127;

         int spclah = ae == 128;
         int spclbh = be == 128;
         int spclch = ce == 128;

         spclal |= spclah;
         spclbl |= spclbh;
         spclcl |= spclch;

         int spcl = spclal | spclbl;
         spcl |= spclcl;

         int spcl2 = spclah | spclbh;
         spcl2 = ~spcl2;
         spcl2 &= spclch;

         // Normalize
         int an = as_int(a) & 0x807fffff;
         int bn = as_int(b) & 0x807fffff;
         int cn = as_int(c) & 0x807fffff;

         an |= 0x3f800000;
         bn |= 0x3f800000;
         cn |= cen;

         a = as_float(an);
         b = as_float(bn);
         c = as_float(cn);

         // Get head & tail parts of a, b
         float ah = as_float(an & 0xfffff000);
         float bh = as_float(bn & 0xfffff000);

         float at = a - ah;
         float bt = b - bh;

         // Get head & tail parts of the product a*b
         float p = a * b;
         float pt = mad(ah, bh, -p);
         pt = mad(ah, bt, pt);
         pt = mad(at, bh, pt);
         pt = mad(at, bt, pt);

         // carefully add p and c; these steps valid only when pe and ce are not far apart
         float rr = p + c;
         float t1 = p - rr;
         t1 += c;
         float t2 = c - rr;
         t2 += p;
         int pick1 = as_int(p) & 0x7fffffff;
         int pick2 = as_int(c) & 0x7fffffff;
         int pick = pick1 > pick2;
         float t = CM(pick, t1, t2);

         float vv = t + pt;
         float ww1 = t - vv;
         ww1 += pt;
         float ww2 = pt - vv;
         ww2 += t;
         pick1 = as_int(t) & 0x7fffffff;
         pick2 = as_int(pt) & 0x7fffffff;
         pick = pick1 > pick2;
         float ww = CM(pick, ww1, ww2);

         // pick r,v,w based on how far apart pe and ce are
         // number 60 is safe; actual value close to 24+24+2
         pick1 = pe - ce;
         pick = pick1 < 60;
         float r = CM(pick, rr, p);
         float v = CM(pick, vv, pt);
         float w = CM(pick, ww, cs);

         // identify if there was a rounding issue, and so correction is needed
         int rndc1 = as_int(r) & 0x7f800000;
         int rndc2 = as_int(v) & 0x7f800000;
         int rndc = rndc1 - rndc2;
         rndc = rndc == 0x0c000000;
         rndc1 = as_int(v) & 0x007fffff;
         rndc1 = rndc1 == 0;
         rndc2 = as_int(w) & 0x7fffffff;
         rndc2 = rndc2 != 0;
         rndc &= rndc1;
         rndc &= rndc2;

         int ws = as_int(w) & 0x80000000;
         int ve = as_int(v) & 0x7f800000;
         ve -= 0x0b800000;
         w = as_float(ws | ve);

         float vw = v + w;
         v = CM(rndc, vw, v);
         float z = r + v;

         // reconstruct return value
         int ze = as_int(z) >> 23;
         ze &= 0xff;
         ze -= 127;
         ze += pe;
         ze += 127;

         int z1e = ze & 0xff;
         z1e <<= 23;
         int z1 = as_int(z) & 0x807fffff;
         z1 |= z1e;

         pick1 = as_int(z) & 0x7fffffff;
         pick = pick1 == 0;
         z = CM(pick, z, z1);

         int z2 = as_int(z) & 0x80000000;
         pick = ze <= 0;
         z = CM(pick, z2, z);
         z2 |=  0x7f800000;
         pick = ze > 254;
         z = CM(pick, z2, z);

         pick1 = ce - pe;
         pick = pick1 > 30;
         z = CM(pick, cs, z);
         z = CM(spcl, z3, z);
         z = CM(spcl2, cs, z);
         return z;
     }
 }
	/*
	* Copyright (c) 2014 Advanced Micro Devices, Inc.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	* THE SOFTWARE.
	*/

	#include "math32.h"

	__attribute__((overloadable, always_inline)) float4
	fma(float4 a, float4 b, float4 c)
	{
	float4 ret;
	ret.lo = fma(a.lo, b.lo, c.lo);
	ret.hi = fma(a.hi, b.hi, c.hi);
	return ret;
	}

	__attribute__((overloadable, always_inline)) float2
	fma(float2 a, float2 b, float2 c)
	{
	float2 ret;
	ret.lo = fma(a.lo, b.lo, c.lo);
	ret.hi = fma(a.hi, b.hi, c.hi);
	return ret;
	}

	#define CM(C, B, A) as_float(__amdil_cmov_logical_i32(as_uint(C), as_uint(B), as_uint(A)))

	__attribute__((overloadable, always_inline)) float
	fma(float a, float b, float c)
	{
	if (HAVE_HW_FMA32()) {
	return __amdil_fma_f32(a, b, c);
	} else {
	float z3 = mad(a, b, c);
	float cs = c;

	int ae = as_int(a) >> 23;
	int be = as_int(b) >> 23;
	int ce = as_int(c) >> 23;

	ae &= 0xff;
	be &= 0xff;
	ce &= 0xff;

	ae -= 127;
	be -= 127;
	ce -= 127;

	int pe = ae + be;

	int cen = ce - pe;
	cen += 127;
	cen <<= 23;

	// special cases flag
	int spclal = ae == -127;
	int spclbl = be == -127;
	int spclcl = ce == -127;

	int spclah = ae == 128;
	int spclbh = be == 128;
	int spclch = ce == 128;

	spclal \|= spclah;
	spclbl \|= spclbh;
	spclcl \|= spclch;

	int spcl = spclal \| spclbl;
	spcl \|= spclcl;

	int spcl2 = spclah \| spclbh;
	spcl2 = ~spcl2;
	spcl2 &= spclch;

	// Normalize
	int an = as_int(a) & 0x807fffff;
	int bn = as_int(b) & 0x807fffff;
	int cn = as_int(c) & 0x807fffff;

	an \|= 0x3f800000;
	bn \|= 0x3f800000;
	cn \|= cen;

	a = as_float(an);
	b = as_float(bn);
	c = as_float(cn);

	// Get head & tail parts of a, b
	float ah = as_float(an & 0xfffff000);
	float bh = as_float(bn & 0xfffff000);

	float at = a - ah;
	float bt = b - bh;

	// Get head & tail parts of the product a*b
	float p = a * b;
	float pt = mad(ah, bh, -p);
	pt = mad(ah, bt, pt);
	pt = mad(at, bh, pt);
	pt = mad(at, bt, pt);

	// carefully add p and c; these steps valid only when pe and ce are not far apart
	float rr = p + c;
	float t1 = p - rr;
	t1 += c;
	float t2 = c - rr;
	t2 += p;
	int pick1 = as_int(p) & 0x7fffffff;
	int pick2 = as_int(c) & 0x7fffffff;
	int pick = pick1 > pick2;
	float t = CM(pick, t1, t2);

	float vv = t + pt;
	float ww1 = t - vv;
	ww1 += pt;
	float ww2 = pt - vv;
	ww2 += t;
	pick1 = as_int(t) & 0x7fffffff;
	pick2 = as_int(pt) & 0x7fffffff;
	pick = pick1 > pick2;
	float ww = CM(pick, ww1, ww2);

	// pick r,v,w based on how far apart pe and ce are
	// number 60 is safe; actual value close to 24+24+2
	pick1 = pe - ce;
	pick = pick1 < 60;
	float r = CM(pick, rr, p);
	float v = CM(pick, vv, pt);
	float w = CM(pick, ww, cs);

	// identify if there was a rounding issue, and so correction is needed
	int rndc1 = as_int(r) & 0x7f800000;
	int rndc2 = as_int(v) & 0x7f800000;
	int rndc = rndc1 - rndc2;
	rndc = rndc == 0x0c000000;
	rndc1 = as_int(v) & 0x007fffff;
	rndc1 = rndc1 == 0;
	rndc2 = as_int(w) & 0x7fffffff;
	rndc2 = rndc2 != 0;
	rndc &= rndc1;
	rndc &= rndc2;

	int ws = as_int(w) & 0x80000000;
	int ve = as_int(v) & 0x7f800000;
	ve -= 0x0b800000;
	w = as_float(ws \| ve);

	float vw = v + w;
	v = CM(rndc, vw, v);
	float z = r + v;

	// reconstruct return value
	int ze = as_int(z) >> 23;
	ze &= 0xff;
	ze -= 127;
	ze += pe;
	ze += 127;

	int z1e = ze & 0xff;
	z1e <<= 23;
	int z1 = as_int(z) & 0x807fffff;
	z1 \|= z1e;

	pick1 = as_int(z) & 0x7fffffff;
	pick = pick1 == 0;
	z = CM(pick, z, z1);

	int z2 = as_int(z) & 0x80000000;
	pick = ze <= 0;
	z = CM(pick, z2, z);
	z2 \|= 0x7f800000;
	pick = ze > 254;
	z = CM(pick, z2, z);

	pick1 = ce - pe;
	pick = pick1 > 30;
	z = CM(pick, cs, z);
	z = CM(spcl, z3, z);
	z = CM(spcl2, cs, z);
	return z;
	}
	}