bolt/lib/Core/ParallelUtilities.cpp - llvm-project - Git at Google

 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Implementation of the class that manages parallel work on BinaryFunctions.
 //
 //===----------------------------------------------------------------------===//

 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Core/BinaryContext.h"
 #include "bolt/Core/BinaryFunction.h"
 #include "llvm/Support/RWMutex.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Timer.h"
 #include <mutex>

 #define DEBUG_TYPE "par-utils"

 namespace opts {
 extern cl::OptionCategory BoltCategory;

 cl::opt<unsigned>
 ThreadCount("thread-count",
   cl::desc("number of threads"),
   cl::init(hardware_concurrency().compute_thread_count()),
   cl::cat(BoltCategory));

 cl::opt<bool>
 NoThreads("no-threads",
   cl::desc("disable multithreading"),
   cl::init(false),
   cl::cat(BoltCategory));

 cl::opt<unsigned>
 TaskCount("tasks-per-thread",
   cl::desc("number of tasks to be created per thread"),
   cl::init(20),
   cl::cat(BoltCategory));

 } // namespace opts

 namespace llvm {
 namespace bolt {
 namespace ParallelUtilities {

 namespace {
 /// A single thread pool that is used to run parallel tasks
 std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;

 unsigned computeCostFor(const BinaryFunction &BF,
                         const PredicateTy &SkipPredicate,
                         const SchedulingPolicy &SchedPolicy) {
   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
     return 1;

   if (SkipPredicate && SkipPredicate(BF))
     return 0;

   switch (SchedPolicy) {
   case SchedulingPolicy::SP_CONSTANT:
     return 1;
   case SchedulingPolicy::SP_INST_LINEAR:
     return BF.getSize();
   case SchedulingPolicy::SP_INST_QUADRATIC:
     return BF.getSize() * BF.getSize();
   case SchedulingPolicy::SP_BB_LINEAR:
     return BF.size();
   case SchedulingPolicy::SP_BB_QUADRATIC:
     return BF.size() * BF.size();
   default:
     llvm_unreachable("unsupported scheduling policy");
   }
 }

 inline unsigned estimateTotalCost(const BinaryContext &BC,
                                   const PredicateTy &SkipPredicate,
                                   SchedulingPolicy &SchedPolicy) {
   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
     return BC.getBinaryFunctions().size();

   unsigned TotalCost = 0;
   for (auto &BFI : BC.getBinaryFunctions()) {
     const BinaryFunction &BF = BFI.second;
     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
   }

   // Switch to trivial scheduling if total estimated work is zero
   if (TotalCost == 0) {
     BC.outs()
         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
            "switch to  trivial scheduling.\n";

     SchedPolicy = SP_TRIVIAL;
     TotalCost = BC.getBinaryFunctions().size();
   }
   return TotalCost;
 }

 } // namespace

 ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
   if (ThreadPoolPtr.get())
     return *ThreadPoolPtr;

   if (ThreadsCount > 1)
     ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
         llvm::hardware_concurrency(ThreadsCount));
   else
     ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
   return *ThreadPoolPtr;
 }

 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
                        std::string LogName, bool ForceSequential,
                        unsigned TasksPerThread) {
   if (BC.getBinaryFunctions().size() == 0)
     return;

   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
     Timer T(LogName, LogName);
     LLVM_DEBUG(T.startTimer());

     for (auto It = BlockBegin; It != BlockEnd; ++It) {
       BinaryFunction &BF = It->second;
       if (SkipPredicate && SkipPredicate(BF))
         continue;

       WorkFunction(BF);
     }
     LLVM_DEBUG(T.stopTimer());
   };

   if (opts::NoThreads || ForceSequential) {
     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
     return;
   }

   // Estimate the overall runtime cost using the scheduling policy
   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
   const unsigned BlockCost =
       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

   // Divide work into blocks of equal cost
   ThreadPoolInterface &Pool = getThreadPool();
   auto BlockBegin = BC.getBinaryFunctions().begin();
   unsigned CurrentCost = 0;

   for (auto It = BC.getBinaryFunctions().begin();
        It != BC.getBinaryFunctions().end(); ++It) {
     BinaryFunction &BF = It->second;
     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);

     if (CurrentCost >= BlockCost) {
       Pool.async(runBlock, BlockBegin, std::next(It));
       BlockBegin = std::next(It);
       CurrentCost = 0;
     }
   }
   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
   Pool.wait();
 }

 void runOnEachFunctionWithUniqueAllocId(
     BinaryContext &BC, SchedulingPolicy SchedPolicy,
     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
   if (BC.getBinaryFunctions().size() == 0)
     return;

   llvm::sys::RWMutex MainLock;
   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
                       MCPlusBuilder::AllocatorIdTy AllocId) {
     Timer T(LogName, LogName);
     LLVM_DEBUG(T.startTimer());
     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
     for (auto It = BlockBegin; It != BlockEnd; ++It) {
       BinaryFunction &BF = It->second;
       if (SkipPredicate && SkipPredicate(BF))
         continue;

       WorkFunction(BF, AllocId);
     }
     LLVM_DEBUG(T.stopTimer());
   };

   unsigned AllocId = 1;
   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
     if (!BC.MIB->checkAllocatorExists(AllocId)) {
       MCPlusBuilder::AllocatorIdTy Id =
           BC.MIB->initializeNewAnnotationAllocator();
       (void)Id;
       assert(AllocId == Id && "unexpected allocator id created");
     }
   };

   if (opts::NoThreads || ForceSequential) {
     EnsureAllocatorExists(AllocId);
     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
              AllocId);
     return;
   }
   // This lock is used to postpone task execution
   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);

   // Estimate the overall runtime cost using the scheduling policy
   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
   const unsigned BlockCost =
       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

   // Divide work into blocks of equal cost
   ThreadPoolInterface &Pool = getThreadPool();
   auto BlockBegin = BC.getBinaryFunctions().begin();
   unsigned CurrentCost = 0;
   for (auto It = BC.getBinaryFunctions().begin();
        It != BC.getBinaryFunctions().end(); ++It) {
     BinaryFunction &BF = It->second;
     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);

     if (CurrentCost >= BlockCost) {
       EnsureAllocatorExists(AllocId);
       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
       AllocId++;
       BlockBegin = std::next(It);
       CurrentCost = 0;
     }
   }

   EnsureAllocatorExists(AllocId);

   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
   Lock.unlock();
   Pool.wait();
 }

 } // namespace ParallelUtilities
 } // namespace bolt
 } // namespace llvm
	//===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Implementation of the class that manages parallel work on BinaryFunctions.
	//
	//===----------------------------------------------------------------------===//

	#include "bolt/Core/ParallelUtilities.h"
	#include "bolt/Core/BinaryContext.h"
	#include "bolt/Core/BinaryFunction.h"
	#include "llvm/Support/RWMutex.h"
	#include "llvm/Support/ThreadPool.h"
	#include "llvm/Support/Timer.h"
	#include <mutex>

	#define DEBUG_TYPE "par-utils"

	namespace opts {
	extern cl::OptionCategory BoltCategory;

	cl::opt<unsigned>
	ThreadCount("thread-count",
	cl::desc("number of threads"),
	cl::init(hardware_concurrency().compute_thread_count()),
	cl::cat(BoltCategory));

	cl::opt<bool>
	NoThreads("no-threads",
	cl::desc("disable multithreading"),
	cl::init(false),
	cl::cat(BoltCategory));

	cl::opt<unsigned>
	TaskCount("tasks-per-thread",
	cl::desc("number of tasks to be created per thread"),
	cl::init(20),
	cl::cat(BoltCategory));

	} // namespace opts

	namespace llvm {
	namespace bolt {
	namespace ParallelUtilities {

	namespace {
	/// A single thread pool that is used to run parallel tasks
	std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;

	unsigned computeCostFor(const BinaryFunction &BF,
	const PredicateTy &SkipPredicate,
	const SchedulingPolicy &SchedPolicy) {
	if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
	return 1;

	if (SkipPredicate && SkipPredicate(BF))
	return 0;

	switch (SchedPolicy) {
	case SchedulingPolicy::SP_CONSTANT:
	return 1;
	case SchedulingPolicy::SP_INST_LINEAR:
	return BF.getSize();
	case SchedulingPolicy::SP_INST_QUADRATIC:
	return BF.getSize() * BF.getSize();
	case SchedulingPolicy::SP_BB_LINEAR:
	return BF.size();
	case SchedulingPolicy::SP_BB_QUADRATIC:
	return BF.size() * BF.size();
	default:
	llvm_unreachable("unsupported scheduling policy");
	}
	}

	inline unsigned estimateTotalCost(const BinaryContext &BC,
	const PredicateTy &SkipPredicate,
	SchedulingPolicy &SchedPolicy) {
	if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
	return BC.getBinaryFunctions().size();

	unsigned TotalCost = 0;
	for (auto &BFI : BC.getBinaryFunctions()) {
	const BinaryFunction &BF = BFI.second;
	TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
	}

	// Switch to trivial scheduling if total estimated work is zero
	if (TotalCost == 0) {
	BC.outs()
	<< "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
	"switch to trivial scheduling.\n";

	SchedPolicy = SP_TRIVIAL;
	TotalCost = BC.getBinaryFunctions().size();
	}
	return TotalCost;
	}

	} // namespace

	ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
	if (ThreadPoolPtr.get())
	return *ThreadPoolPtr;

	if (ThreadsCount > 1)
	ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
	llvm::hardware_concurrency(ThreadsCount));
	else
	ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
	return *ThreadPoolPtr;
	}

	void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
	WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
	std::string LogName, bool ForceSequential,
	unsigned TasksPerThread) {
	if (BC.getBinaryFunctions().size() == 0)
	return;

	auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
	std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
	Timer T(LogName, LogName);
	LLVM_DEBUG(T.startTimer());

	for (auto It = BlockBegin; It != BlockEnd; ++It) {
	BinaryFunction &BF = It->second;
	if (SkipPredicate && SkipPredicate(BF))
	continue;

	WorkFunction(BF);
	}
	LLVM_DEBUG(T.stopTimer());
	};

	if (opts::NoThreads \|\| ForceSequential) {
	runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
	return;
	}

	// Estimate the overall runtime cost using the scheduling policy
	const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
	const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
	const unsigned BlockCost =
	TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

	// Divide work into blocks of equal cost
	ThreadPoolInterface &Pool = getThreadPool();
	auto BlockBegin = BC.getBinaryFunctions().begin();
	unsigned CurrentCost = 0;

	for (auto It = BC.getBinaryFunctions().begin();
	It != BC.getBinaryFunctions().end(); ++It) {
	BinaryFunction &BF = It->second;
	CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);

	if (CurrentCost >= BlockCost) {
	Pool.async(runBlock, BlockBegin, std::next(It));
	BlockBegin = std::next(It);
	CurrentCost = 0;
	}
	}
	Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
	Pool.wait();
	}

	void runOnEachFunctionWithUniqueAllocId(
	BinaryContext &BC, SchedulingPolicy SchedPolicy,
	WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
	std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
	if (BC.getBinaryFunctions().size() == 0)
	return;

	llvm::sys::RWMutex MainLock;
	auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
	std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
	MCPlusBuilder::AllocatorIdTy AllocId) {
	Timer T(LogName, LogName);
	LLVM_DEBUG(T.startTimer());
	std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
	for (auto It = BlockBegin; It != BlockEnd; ++It) {
	BinaryFunction &BF = It->second;
	if (SkipPredicate && SkipPredicate(BF))
	continue;

	WorkFunction(BF, AllocId);
	}
	LLVM_DEBUG(T.stopTimer());
	};

	unsigned AllocId = 1;
	auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
	if (!BC.MIB->checkAllocatorExists(AllocId)) {
	MCPlusBuilder::AllocatorIdTy Id =
	BC.MIB->initializeNewAnnotationAllocator();
	(void)Id;
	assert(AllocId == Id && "unexpected allocator id created");
	}
	};

	if (opts::NoThreads \|\| ForceSequential) {
	EnsureAllocatorExists(AllocId);
	runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
	AllocId);
	return;
	}
	// This lock is used to postpone task execution
	std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);

	// Estimate the overall runtime cost using the scheduling policy
	const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
	const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
	const unsigned BlockCost =
	TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

	// Divide work into blocks of equal cost
	ThreadPoolInterface &Pool = getThreadPool();
	auto BlockBegin = BC.getBinaryFunctions().begin();
	unsigned CurrentCost = 0;
	for (auto It = BC.getBinaryFunctions().begin();
	It != BC.getBinaryFunctions().end(); ++It) {
	BinaryFunction &BF = It->second;
	CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);

	if (CurrentCost >= BlockCost) {
	EnsureAllocatorExists(AllocId);
	Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
	AllocId++;
	BlockBegin = std::next(It);
	CurrentCost = 0;
	}
	}

	EnsureAllocatorExists(AllocId);

	Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
	Lock.unlock();
	Pool.wait();
	}

	} // namespace ParallelUtilities
	} // namespace bolt
	} // namespace llvm