blob: 822b23b1d2108fff16a2f7ce062fecef4db68031 [file] [log] [blame]
//===------ RegisterPasses.cpp - Add the Polly Passes to default passes --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file composes the individual LLVM-IR passes provided by Polly to a
// functional polyhedral optimizer. The polyhedral optimizer is automatically
// made available to LLVM based compilers by loading the Polly shared library
// into such a compiler.
//
// The Polly optimizer is made available by executing a static constructor that
// registers the individual Polly passes in the LLVM pass manager builder. The
// passes are registered such that the default behaviour of the compiler is not
// changed, but that the flag '-polly' provided at optimization level '-O3'
// enables additional polyhedral optimizations.
//===----------------------------------------------------------------------===//
#include "polly/RegisterPasses.h"
#include "polly/Canonicalization.h"
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/CodegenCleanup.h"
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
using namespace polly;
cl::OptionCategory PollyCategory("Polly Options",
"Configure the polly loop optimizer");
static cl::opt<bool>
PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> PollyDetectOnly(
"polly-only-scop-detection",
cl::desc("Only run scop detection, but no other optimizations"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
enum PassPositionChoice {
POSITION_EARLY,
POSITION_AFTER_LOOPOPT,
POSITION_BEFORE_VECTORIZER
};
enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
static cl::opt<PassPositionChoice> PassPosition(
"polly-position", cl::desc("Where to run polly in the pass pipeline"),
cl::values(
clEnumValN(POSITION_EARLY, "early", "Before everything"),
clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
"After the loop optimizer (but within the inline cycle)"),
clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
"Right before the vectorizer"),
clEnumValEnd),
cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<OptimizerChoice> Optimizer(
"polly-optimizer", cl::desc("Select the scheduling optimizer"),
cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
clEnumValN(OPTIMIZER_ISL, "isl", "The isl scheduling optimizer"),
clEnumValEnd),
cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
cl::cat(PollyCategory));
enum CodeGenChoice { CODEGEN_ISL, CODEGEN_NONE };
static cl::opt<CodeGenChoice> CodeGenerator(
"polly-code-generator", cl::desc("Select the code generator"),
cl::values(clEnumValN(CODEGEN_ISL, "isl", "isl code generator"),
clEnumValN(CODEGEN_NONE, "none", "no code generation"),
clEnumValEnd),
cl::Hidden, cl::init(CODEGEN_ISL), cl::ZeroOrMore, cl::cat(PollyCategory));
enum TargetChoice { TARGET_CPU, TARGET_GPU };
static cl::opt<TargetChoice>
Target("polly-target", cl::desc("The hardware to target"),
cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code"),
#ifdef GPU_CODEGEN
clEnumValN(TARGET_GPU, "gpu", "generate GPU code"),
#endif
clEnumValEnd),
cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
VectorizerChoice polly::PollyVectorizerChoice;
static cl::opt<polly::VectorizerChoice, true> Vectorizer(
"polly-vectorizer", cl::desc("Select the vectorization strategy"),
cl::values(
clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
clEnumValN(polly::VECTORIZER_POLLY, "polly",
"Polly internal vectorizer"),
clEnumValN(polly::VECTORIZER_STRIPMINE, "stripmine",
"Strip-mine outer loops for the loop-vectorizer to trigger"),
clEnumValEnd),
cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> ImportJScop(
"polly-import",
cl::desc("Export the polyhedral description of the detected Scops"),
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> ExportJScop(
"polly-export",
cl::desc("Export the polyhedral description of the detected Scops"),
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> DeadCodeElim("polly-run-dce",
cl::desc("Run the dead code elimination"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool> PollyViewer(
"polly-show",
cl::desc("Highlight the code regions that will be optimized in a "
"(CFG BBs and LLVM-IR instructions)"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> PollyOnlyViewer(
"polly-show-only",
cl::desc("Highlight the code regions that will be optimized in "
"a (CFG only BBs)"),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<bool>
PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<bool> PollyOnlyPrinter(
"polly-dot-only",
cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<bool>
CFGPrinter("polly-view-cfg",
cl::desc("Show the Polly CFG right after code generation"),
cl::Hidden, cl::init(false), cl::cat(PollyCategory));
namespace polly {
void initializePollyPasses(PassRegistry &Registry) {
initializeCodeGenerationPass(Registry);
#ifdef GPU_CODEGEN
initializePPCGCodeGenerationPass(Registry);
#endif
initializeCodePreparationPass(Registry);
initializeDeadCodeElimPass(Registry);
initializeDependenceInfoPass(Registry);
initializeDependenceInfoWrapperPassPass(Registry);
initializeJSONExporterPass(Registry);
initializeJSONImporterPass(Registry);
initializeIslAstInfoPass(Registry);
initializeIslScheduleOptimizerPass(Registry);
initializePollyCanonicalizePass(Registry);
initializeScopDetectionPass(Registry);
initializeScopInfoRegionPassPass(Registry);
initializeScopInfoWrapperPassPass(Registry);
initializeCodegenCleanupPass(Registry);
}
/// @brief Register Polly passes such that they form a polyhedral optimizer.
///
/// The individual Polly passes are registered in the pass manager such that
/// they form a full polyhedral optimizer. The flow of the optimizer starts with
/// a set of preparing transformations that canonicalize the LLVM-IR such that
/// the LLVM-IR is easier for us to understand and to optimizes. On the
/// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
/// static control flow regions. Those regions are then translated by the
/// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
/// optimizer is run on the polyhedral representation and finally the optimized
/// polyhedral representation is code generated back to LLVM-IR.
///
/// Besides this core functionality, we optionally schedule passes that provide
/// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
/// allow the export/import of the polyhedral representation
/// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
///
/// For certain parts of the Polly optimizer, several alternatives are provided:
///
/// As scheduling optimizer we support the isl scheduling optimizer
/// (http://freecode.com/projects/isl).
/// It is also possible to run Polly with no optimizer. This mode is mainly
/// provided to analyze the run and compile time changes caused by the
/// scheduling optimizer.
///
/// Polly supports the isl internal code generator.
void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
PM.add(polly::createScopDetectionPass());
if (PollyDetectOnly)
return;
if (PollyViewer)
PM.add(polly::createDOTViewerPass());
if (PollyOnlyViewer)
PM.add(polly::createDOTOnlyViewerPass());
if (PollyPrinter)
PM.add(polly::createDOTPrinterPass());
if (PollyOnlyPrinter)
PM.add(polly::createDOTOnlyPrinterPass());
PM.add(polly::createScopInfoRegionPassPass());
if (ImportJScop)
PM.add(polly::createJSONImporterPass());
if (DeadCodeElim)
PM.add(polly::createDeadCodeElimPass());
if (Target == TARGET_GPU) {
// GPU generation provides its own scheduling optimization strategy.
} else {
switch (Optimizer) {
case OPTIMIZER_NONE:
break; /* Do nothing */
case OPTIMIZER_ISL:
PM.add(polly::createIslScheduleOptimizerPass());
break;
}
}
if (ExportJScop)
PM.add(polly::createJSONExporterPass());
if (Target == TARGET_GPU) {
#ifdef GPU_CODEGEN
PM.add(polly::createPPCGCodeGenerationPass());
#endif
} else {
switch (CodeGenerator) {
case CODEGEN_ISL:
PM.add(polly::createCodeGenerationPass());
break;
case CODEGEN_NONE:
break;
}
}
// FIXME: This dummy ModulePass keeps some programs from miscompiling,
// probably some not correctly preserved analyses. It acts as a barrier to
// force all analysis results to be recomputed.
PM.add(createBarrierNoopPass());
if (CFGPrinter)
PM.add(llvm::createCFGPrinterPass());
}
static bool shouldEnablePolly() {
if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer)
PollyTrackFailures = true;
if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer ||
ExportJScop || ImportJScop)
PollyEnabled = true;
return PollyEnabled;
}
static void
registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
llvm::legacy::PassManagerBase &PM) {
if (!polly::shouldEnablePolly())
return;
if (PassPosition != POSITION_EARLY)
return;
registerCanonicalicationPasses(PM);
polly::registerPollyPasses(PM);
}
static void
registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
llvm::legacy::PassManagerBase &PM) {
if (!polly::shouldEnablePolly())
return;
if (PassPosition != POSITION_AFTER_LOOPOPT)
return;
PM.add(polly::createCodePreparationPass());
polly::registerPollyPasses(PM);
PM.add(createCodegenCleanupPass());
}
static void
registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
llvm::legacy::PassManagerBase &PM) {
if (!polly::shouldEnablePolly())
return;
if (PassPosition != POSITION_BEFORE_VECTORIZER)
return;
PM.add(polly::createCodePreparationPass());
polly::registerPollyPasses(PM);
PM.add(createCodegenCleanupPass());
}
/// @brief Register Polly to be available as an optimizer
///
///
/// We can currently run Polly at three different points int the pass manager.
/// a) very early, b) after the canonicalizing loop transformations and c) right
/// before the vectorizer.
///
/// The default is currently a), to register Polly such that it runs as early as
/// possible. This has several implications:
///
/// 1) We need to schedule more canonicalization passes
///
/// As nothing is run before Polly, it is necessary to run a set of preparing
/// transformations before Polly to canonicalize the LLVM-IR and to allow
/// Polly to detect and understand the code.
///
/// 2) LICM and LoopIdiom pass have not yet been run
///
/// Loop invariant code motion as well as the loop idiom recognition pass make
/// it more difficult for Polly to transform code. LICM may introduce
/// additional data dependences that are hard to eliminate and the loop idiom
/// recognition pass may introduce calls to memset that we currently do not
/// understand. By running Polly early enough (meaning before these passes) we
/// avoid difficulties that may be introduced by these passes.
///
/// 3) We get the full -O3 optimization sequence after Polly
///
/// The LLVM-IR that is generated by Polly has been optimized on a high level,
/// but it may be rather inefficient on the lower/scalar level. By scheduling
/// Polly before all other passes, we have the full sequence of -O3
/// optimizations behind us, such that inefficiencies on the low level can
/// be optimized away.
///
/// We are currently evaluating the benefit or running Polly at position b) or
/// c). b) is likely to early as it interacts with the inliner. c) is nice
/// as everything is fully inlined and canonicalized, but we need to be able
/// to handle LICMed code to make it useful.
static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
registerPollyEarlyAsPossiblePasses);
static llvm::RegisterStandardPasses
RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
registerPollyLoopOptimizerEndPasses);
static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
llvm::PassManagerBuilder::EP_VectorizerStart,
registerPollyScalarOptimizerLatePasses);
} // namespace polly