| //===-- Generate random but valid function descriptors -------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "automemcpy/RandomFunctionGenerator.h" |
| |
| #include <llvm/ADT/StringRef.h> |
| #include <llvm/Support/raw_ostream.h> |
| |
| #include <optional> |
| #include <set> |
| |
| namespace llvm { |
| namespace automemcpy { |
| |
| // Exploration parameters |
| // ---------------------- |
| // Here we define a set of values that will contraint the exploration and |
| // limit combinatorial explosion. |
| |
| // We limit the number of cases for individual sizes to sizes up to 4. |
| // More individual sizes don't bring much over the overlapping strategy. |
| static constexpr int kMaxIndividualSize = 4; |
| |
| // We limit Overlapping Strategy to sizes up to 256. |
| // An overlap of 256B means accessing 128B at once which is usually not |
| // feasible by current CPUs. We rely on the compiler to generate multiple |
| // loads/stores if needed but higher sizes are unlikely to benefit from hardware |
| // acceleration. |
| static constexpr int kMaxOverlapSize = 256; |
| |
| // For the loop strategies, we make sure that they iterate at least a certain |
| // number of times to amortize the cost of looping. |
| static constexpr int kLoopMinIter = 3; |
| static constexpr int kAlignedLoopMinIter = 2; |
| |
| // We restrict the size of the block of data to handle in a loop. |
| // Generally speaking block size <= 16 perform poorly. |
| static constexpr int kLoopBlockSize[] = {16, 32, 64}; |
| |
| // We restrict alignment to the following values. |
| static constexpr int kLoopAlignments[] = {16, 32, 64}; |
| |
| // We make sure that the region bounds are one of the following values. |
| static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, |
| 64, 96, 128, 256, 512, 1024, kMaxSize}; |
| |
| // We also allow disabling loops, aligned loops and accelerators. |
| static constexpr bool kDisableLoop = false; |
| static constexpr bool kDisableAlignedLoop = false; |
| static constexpr bool kDisableAccelerator = false; |
| |
| // For memcpy, we can also explore whether aligning on source or destination has |
| // an effect. |
| static constexpr bool kExploreAlignmentArg = true; |
| |
| // The function we generate code for. |
| // BCMP is specifically disabled for now. |
| static constexpr int kFunctionTypes[] = { |
| (int)FunctionType::MEMCPY, |
| (int)FunctionType::MEMCMP, |
| // (int)FunctionType::BCMP, |
| (int)FunctionType::MEMSET, |
| (int)FunctionType::BZERO, |
| }; |
| |
| // The actual implementation of each function can be handled via primitive types |
| // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). |
| // We want to move toward delegating the code generation entirely to the |
| // compiler but for now we have to make use of -per microarchitecture- custom |
| // implementations. Scalar being more portable but also less performant, we |
| // remove it as well. |
| static constexpr int kElementClasses[] = { |
| // (int)ElementTypeClass::SCALAR, |
| (int)ElementTypeClass::NATIVE, |
| // (int)ElementTypeClass::BUILTIN |
| }; |
| |
| RandomFunctionGenerator::RandomFunctionGenerator() |
| : Solver(Context), Type(Context.int_const("Type")), |
| ContiguousBegin(Context.int_const("ContiguousBegin")), |
| ContiguousEnd(Context.int_const("ContiguousEnd")), |
| OverlapBegin(Context.int_const("OverlapBegin")), |
| OverlapEnd(Context.int_const("OverlapEnd")), |
| LoopBegin(Context.int_const("LoopBegin")), |
| LoopEnd(Context.int_const("LoopEnd")), |
| LoopBlockSize(Context.int_const("LoopBlockSize")), |
| AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), |
| AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), |
| AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), |
| AlignedAlignment(Context.int_const("AlignedAlignment")), |
| AlignedArg(Context.int_const("AlignedArg")), |
| AcceleratorBegin(Context.int_const("AcceleratorBegin")), |
| AcceleratorEnd(Context.int_const("AcceleratorEnd")), |
| ElementClass(Context.int_const("ElementClass")) { |
| // All possible functions. |
| Solver.add(inSetConstraint(Type, kFunctionTypes)); |
| |
| // Add constraints for region bounds. |
| addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); |
| addBoundsAndAnchors(OverlapBegin, OverlapEnd); |
| addBoundsAndAnchors(LoopBegin, LoopEnd); |
| addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); |
| addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); |
| // We always consider strategies in this order, and we |
| // always end with the `Accelerator` strategy, as it's typically more |
| // efficient for large sizes. |
| // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator |
| Solver.add(ContiguousEnd == OverlapBegin); |
| Solver.add(OverlapEnd == LoopBegin); |
| Solver.add(LoopEnd == AlignedLoopBegin); |
| Solver.add(AlignedLoopEnd == AcceleratorBegin); |
| // Fix endpoints: The minimum size that we want to copy is 0, and we always |
| // start with the `Contiguous` strategy. The max size is `kMaxSize`. |
| Solver.add(ContiguousBegin == 0); |
| Solver.add(AcceleratorEnd == kMaxSize); |
| // Contiguous |
| Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); |
| // Overlap |
| Solver.add(OverlapEnd <= kMaxOverlapSize + 1); |
| // Overlap only ever makes sense when accessing multiple bytes at a time. |
| // i.e. Overlap<1> is useless. |
| Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); |
| // Loop |
| addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); |
| // Aligned Loop |
| addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, |
| kAlignedLoopMinIter); |
| Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); |
| Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); |
| Solver.add(AlignedLoopBlockSize >= AlignedAlignment); |
| Solver.add(AlignedLoopBlockSize >= LoopBlockSize); |
| z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; |
| z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; |
| Solver.add( |
| (ExploreAlignment && |
| inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || |
| (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); |
| // Accelerator |
| Solver.add(IsMemcpy || |
| (AcceleratorBegin == |
| AcceleratorEnd)); // Only Memcpy has accelerator for now. |
| // Element classes |
| Solver.add(inSetConstraint(ElementClass, kElementClasses)); |
| |
| if (kDisableLoop) |
| Solver.add(LoopBegin == LoopEnd); |
| if (kDisableAlignedLoop) |
| Solver.add(AlignedLoopBegin == AlignedLoopEnd); |
| if (kDisableAccelerator) |
| Solver.add(AcceleratorBegin == AcceleratorEnd); |
| } |
| |
| // Creates SizeSpan from Begin/End values. |
| // Returns std::nullopt if Begin==End. |
| static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) { |
| if (Begin == End) |
| return std::nullopt; |
| SizeSpan SS; |
| SS.Begin = Begin; |
| SS.End = End; |
| return SS; |
| } |
| |
| // Generic method to create a `Region` struct with a Span or std::nullopt if |
| // span is empty. |
| template <typename Region> |
| static std::optional<Region> As(size_t Begin, size_t End) { |
| if (auto Span = AsSizeSpan(Begin, End)) { |
| Region Output; |
| Output.Span = *Span; |
| return Output; |
| } |
| return std::nullopt; |
| } |
| |
| // Returns a Loop struct or std::nullopt if span is empty. |
| static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) { |
| if (auto Span = AsSizeSpan(Begin, End)) { |
| Loop Output; |
| Output.Span = *Span; |
| Output.BlockSize = BlockSize; |
| return Output; |
| } |
| return std::nullopt; |
| } |
| |
| // Returns an AlignedLoop struct or std::nullopt if span is empty. |
| static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End, |
| size_t BlockSize, |
| size_t Alignment, |
| AlignArg AlignTo) { |
| if (auto Loop = AsLoop(Begin, End, BlockSize)) { |
| AlignedLoop Output; |
| Output.Loop = *Loop; |
| Output.Alignment = Alignment; |
| Output.AlignTo = AlignTo; |
| return Output; |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<FunctionDescriptor> RandomFunctionGenerator::next() { |
| if (Solver.check() != z3::sat) |
| return {}; |
| |
| z3::model m = Solver.get_model(); |
| |
| // Helper method to get the current numerical value of a z3::expr. |
| const auto E = [&m](z3::expr &V) -> int { |
| return m.eval(V).get_numeral_int(); |
| }; |
| |
| // Fill is the function descriptor to return. |
| FunctionDescriptor R; |
| R.Type = FunctionType(E(Type)); |
| R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd)); |
| R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd)); |
| R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); |
| R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), |
| E(AlignedLoopBlockSize), E(AlignedAlignment), |
| AlignArg(E(AlignedArg))); |
| R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd)); |
| R.ElementClass = ElementTypeClass(E(ElementClass)); |
| |
| // Express current state as a set of constraints. |
| z3::expr CurrentLayout = |
| (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && |
| (ContiguousEnd == E(ContiguousEnd)) && |
| (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && |
| (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && |
| (LoopBlockSize == E(LoopBlockSize)) && |
| (AlignedLoopBegin == E(AlignedLoopBegin)) && |
| (AlignedLoopEnd == E(AlignedLoopEnd)) && |
| (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && |
| (AlignedAlignment == E(AlignedAlignment)) && |
| (AlignedArg == E(AlignedArg)) && |
| (AcceleratorBegin == E(AcceleratorBegin)) && |
| (AcceleratorEnd == E(AcceleratorEnd)) && |
| (ElementClass == E(ElementClass)); |
| |
| // Ask solver to never show this configuration ever again. |
| Solver.add(!CurrentLayout); |
| return R; |
| } |
| |
| // Make sure `Variable` is one of the provided values. |
| z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, |
| ArrayRef<int> Values) const { |
| z3::expr_vector Args(Variable.ctx()); |
| for (int Value : Values) |
| Args.push_back(Variable == Value); |
| return z3::mk_or(Args); |
| } |
| |
| void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, |
| z3::expr &End) { |
| // Begin and End are picked amongst a set of predefined values. |
| Solver.add(inSetConstraint(Begin, kAnchors)); |
| Solver.add(inSetConstraint(End, kAnchors)); |
| Solver.add(Begin >= 0); |
| Solver.add(Begin <= End); |
| Solver.add(End <= kMaxSize); |
| } |
| |
| void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, |
| const z3::expr &LoopEnd, |
| z3::expr &LoopBlockSize, |
| int LoopMinIter) { |
| Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); |
| Solver.add(LoopBegin == LoopEnd || |
| (LoopBegin > (LoopMinIter * LoopBlockSize))); |
| } |
| |
| } // namespace automemcpy |
| } // namespace llvm |