lib/Support/BlockFrequency.cpp - llvm - Git at Google

 //====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements Block Frequency class.
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>

 using namespace llvm;

 /// Multiply FREQ by N and store result in W array.
 static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
   uint64_t u0 = freq & UINT32_MAX;
   uint64_t u1 = freq >> 32;

   // Represent 96-bit value as W[2]:W[1]:W[0];
   uint64_t t = u0 * N;
   uint64_t k = t >> 32;
   W[0] = t;
   t = u1 * N + k;
   W[1] = t;
   W[2] = t >> 32;
 }

 /// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
 /// 32 bit unsigned integer, we can use a short division algorithm.
 static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
   // We assume that W[2] is non-zero since if W[2] is not then the user should
   // just use hardware division.
   assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
          "zero, the caller should just use 64/32 hardware.");
   uint32_t Q[3] = { 0, 0, 0 };

   // The generalized short division algorithm sets i to m + n - 1, where n is
   // the number of words in the divisior and m is the number of words by which
   // the divident exceeds the divisor (i.e. m + n == the length of the dividend
   // in words). Due to our assumption that W[2] is non-zero, we know that the
   // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
   // m + n - 1 = 2 + 1 - 1 = 2.
   uint32_t R = 0;
   for (int i = 2; i >= 0; --i) {
     uint64_t PartialD = uint64_t(R) << 32 | W[i];
     if (PartialD == 0) {
       Q[i] = 0;
       R = 0;
     } else if (PartialD < D) {
       Q[i] = 0;
       R = uint32_t(PartialD);
     } else if (PartialD == D) {
       Q[i] = 1;
       R = 0;
     } else {
       Q[i] = uint32_t(PartialD / D);
       R = uint32_t(PartialD - (Q[i] * D));
     }
   }

   // If Q[2] is non-zero, then we overflowed.
   uint64_t Result;
   if (Q[2]) {
     Result = UINT64_MAX;
     R = D;
   } else {
     // Form the final uint64_t result, avoiding endianness issues.
     Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
   }

   if (Rout)
     *Rout = R;

   return Result;
 }

 uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
   assert(D != 0 && "Division by zero");

   // Calculate Frequency * N.
   uint64_t MulLo = (Frequency & UINT32_MAX) * N;
   uint64_t MulHi = (Frequency >> 32) * N;
   uint64_t MulRes = (MulHi << 32) + MulLo;

   // If the product fits in 64 bits, just use built-in division.
   if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
     Frequency = MulRes / D;
     return MulRes % D;
   }

   // Product overflowed, use 96-bit operations.
   // 96-bit value represented as W[2]:W[1]:W[0].
   uint32_t W[3];
   uint32_t R;
   mult96bit(Frequency, N, W);
   Frequency = divrem96bit(W, D, &R);
   return R;
 }

 BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
   scale(Prob.getNumerator(), Prob.getDenominator());
   return *this;
 }

 const BlockFrequency
 BlockFrequency::operator*(const BranchProbability &Prob) const {
   BlockFrequency Freq(Frequency);
   Freq *= Prob;
   return Freq;
 }

 BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
   scale(Prob.getDenominator(), Prob.getNumerator());
   return *this;
 }

 BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
   BlockFrequency Freq(Frequency);
   Freq /= Prob;
   return Freq;
 }

 BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
   uint64_t Before = Freq.Frequency;
   Frequency += Freq.Frequency;

   // If overflow, set frequency to the maximum value.
   if (Frequency < Before)
     Frequency = UINT64_MAX;

   return *this;
 }

 const BlockFrequency
 BlockFrequency::operator+(const BlockFrequency &Prob) const {
   BlockFrequency Freq(Frequency);
   Freq += Prob;
   return Freq;
 }

 uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
   return scale(Prob.getNumerator(), Prob.getDenominator());
 }

 void BlockFrequency::print(raw_ostream &OS) const {
   // Convert fixed-point number to decimal.
   OS << Frequency / getEntryFrequency() << ".";
   uint64_t Rem = Frequency % getEntryFrequency();
   uint64_t Eps = 1;
   do {
     Rem *= 10;
     Eps *= 10;
     OS << Rem / getEntryFrequency();
     Rem = Rem % getEntryFrequency();
   } while (Rem >= Eps/2);
 }

 namespace llvm {

 raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) {
   Freq.print(OS);
   return OS;
 }

 }
	//====--------------- lib/Support/BlockFrequency.cpp ------------ C++ --====//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements Block Frequency class.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/BlockFrequency.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cassert>

	using namespace llvm;

	/// Multiply FREQ by N and store result in W array.
	static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
	uint64_t u0 = freq & UINT32_MAX;
	uint64_t u1 = freq >> 32;

	// Represent 96-bit value as W[2]:W[1]:W[0];
	uint64_t t = u0 * N;
	uint64_t k = t >> 32;
	W[0] = t;
	t = u1 * N + k;
	W[1] = t;
	W[2] = t >> 32;
	}

	/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
	/// 32 bit unsigned integer, we can use a short division algorithm.
	static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
	// We assume that W[2] is non-zero since if W[2] is not then the user should
	// just use hardware division.
	assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
	"zero, the caller should just use 64/32 hardware.");
	uint32_t Q[3] = { 0, 0, 0 };

	// The generalized short division algorithm sets i to m + n - 1, where n is
	// the number of words in the divisior and m is the number of words by which
	// the divident exceeds the divisor (i.e. m + n == the length of the dividend
	// in words). Due to our assumption that W[2] is non-zero, we know that the
	// dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
	// m + n - 1 = 2 + 1 - 1 = 2.
	uint32_t R = 0;
	for (int i = 2; i >= 0; --i) {
	uint64_t PartialD = uint64_t(R) << 32 \| W[i];
	if (PartialD == 0) {
	Q[i] = 0;
	R = 0;
	} else if (PartialD < D) {
	Q[i] = 0;
	R = uint32_t(PartialD);
	} else if (PartialD == D) {
	Q[i] = 1;
	R = 0;
	} else {
	Q[i] = uint32_t(PartialD / D);
	R = uint32_t(PartialD - (Q[i] * D));
	}
	}

	// If Q[2] is non-zero, then we overflowed.
	uint64_t Result;
	if (Q[2]) {
	Result = UINT64_MAX;
	R = D;
	} else {
	// Form the final uint64_t result, avoiding endianness issues.
	Result = uint64_t(Q[0]) \| (uint64_t(Q[1]) << 32);
	}

	if (Rout)
	*Rout = R;

	return Result;
	}

	uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
	assert(D != 0 && "Division by zero");

	// Calculate Frequency * N.
	uint64_t MulLo = (Frequency & UINT32_MAX) * N;
	uint64_t MulHi = (Frequency >> 32) * N;
	uint64_t MulRes = (MulHi << 32) + MulLo;

	// If the product fits in 64 bits, just use built-in division.
	if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
	Frequency = MulRes / D;
	return MulRes % D;
	}

	// Product overflowed, use 96-bit operations.
	// 96-bit value represented as W[2]:W[1]:W[0].
	uint32_t W[3];
	uint32_t R;
	mult96bit(Frequency, N, W);
	Frequency = divrem96bit(W, D, &R);
	return R;
	}

	BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
	scale(Prob.getNumerator(), Prob.getDenominator());
	return *this;
	}

	const BlockFrequency
	BlockFrequency::operator*(const BranchProbability &Prob) const {
	BlockFrequency Freq(Frequency);
	Freq *= Prob;
	return Freq;
	}

	BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
	scale(Prob.getDenominator(), Prob.getNumerator());
	return *this;
	}

	BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
	BlockFrequency Freq(Frequency);
	Freq /= Prob;
	return Freq;
	}

	BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
	uint64_t Before = Freq.Frequency;
	Frequency += Freq.Frequency;

	// If overflow, set frequency to the maximum value.
	if (Frequency < Before)
	Frequency = UINT64_MAX;

	return *this;
	}

	const BlockFrequency
	BlockFrequency::operator+(const BlockFrequency &Prob) const {
	BlockFrequency Freq(Frequency);
	Freq += Prob;
	return Freq;
	}

	uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
	return scale(Prob.getNumerator(), Prob.getDenominator());
	}

	void BlockFrequency::print(raw_ostream &OS) const {
	// Convert fixed-point number to decimal.
	OS << Frequency / getEntryFrequency() << ".";
	uint64_t Rem = Frequency % getEntryFrequency();
	uint64_t Eps = 1;
	do {
	Rem *= 10;
	Eps *= 10;
	OS << Rem / getEntryFrequency();
	Rem = Rem % getEntryFrequency();
	} while (Rem >= Eps/2);
	}

	namespace llvm {

	raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) {
	Freq.print(OS);
	return OS;
	}

	}