offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp - llvm-project - Git at Google

 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// This file contains the definition of the GpuMathTest class, a test harness
 /// that orchestrates running a math function on a device (GPU) and verifying
 /// its results.
 ///
 //===----------------------------------------------------------------------===//

 #ifndef MATHTEST_GPUMATHTEST_HPP
 #define MATHTEST_GPUMATHTEST_HPP

 #include "mathtest/DeviceContext.hpp"
 #include "mathtest/DeviceResources.hpp"
 #include "mathtest/HostRefChecker.hpp"
 #include "mathtest/InputGenerator.hpp"
 #include "mathtest/Support.hpp"
 #include "mathtest/TestResult.hpp"

 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"

 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>

 namespace mathtest {

 template <auto Func, typename Checker = HostRefChecker<Func>>
 class [[nodiscard]] GpuMathTest final {
   using FunctionTraits = FunctionTraits<Func>;
   using OutType = typename FunctionTraits::ReturnType;
   using InTypesTuple = typename FunctionTraits::ArgTypesTuple;

   template <typename... Ts>
   using PartialResultType = TestResult<OutType, Ts...>;
   using KernelSignature = KernelSignatureOf_t<Func>;

   template <typename... Ts>
   using TypeIdentitiesTuple = std::tuple<TypeIdentityOf<Ts>...>;
   using InTypeIdentitiesTuple =
       ApplyTupleTypes_t<InTypesTuple, TypeIdentitiesTuple>;

   static constexpr std::size_t DefaultBufferSize =
       DefaultBufferSizeFor_v<OutType, InTypesTuple>;
   static constexpr uint32_t DefaultGroupSize = 512;

 public:
   using FunctionConfig = FunctionConfig<Func>;
   using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
   using GeneratorType = ApplyTupleTypes_t<InTypesTuple, InputGenerator>;

   [[nodiscard]] static llvm::Expected<GpuMathTest>
   create(std::shared_ptr<DeviceContext> Context, llvm::StringRef Provider,
          llvm::StringRef DeviceBinaryDir) {
     assert(Context && "Context must not be null");

     auto ExpectedKernel = getKernel(*Context, Provider, DeviceBinaryDir);
     if (!ExpectedKernel)
       return ExpectedKernel.takeError();

     return GpuMathTest(std::move(Context), Provider, *ExpectedKernel);
   }

   ResultType run(GeneratorType &Generator,
                  std::size_t BufferSize = DefaultBufferSize,
                  uint32_t GroupSize = DefaultGroupSize) const noexcept {
     assert(BufferSize > 0 && "Buffer size must be a positive value");
     assert(GroupSize > 0 && "Group size must be a positive value");

     auto [InBuffersTuple, OutBuffer] = createBuffers(BufferSize);
     ResultType FinalResult;

     while (true) {
       const std::size_t BatchSize = std::apply(
           [&](auto &...Buffers) { return Generator.fill(Buffers...); },
           InBuffersTuple);

       if (BatchSize == 0)
         break;

       const auto BatchResult =
           processBatch(InBuffersTuple, OutBuffer, BatchSize, GroupSize);

       FinalResult.accumulate(BatchResult);
     }

     return FinalResult;
   }

   [[nodiscard]] std::shared_ptr<DeviceContext> getContext() const noexcept {
     return Context;
   }

   [[nodiscard]] std::string getProvider() const noexcept { return Provider; }

 private:
   explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
                        llvm::StringRef Provider,
                        DeviceKernel<KernelSignature> Kernel)
       : Context(std::move(Context)), Provider(Provider), Kernel(Kernel) {}

   static llvm::Expected<DeviceKernel<KernelSignature>>
   getKernel(const DeviceContext &Context, llvm::StringRef Provider,
             llvm::StringRef DeviceBinaryDir) {
     llvm::StringRef BinaryName = Provider;

     auto ExpectedImage = Context.loadBinary(DeviceBinaryDir, BinaryName);
     if (!ExpectedImage)
       return ExpectedImage.takeError();

     auto ExpectedKernel = Context.getKernel<KernelSignature>(
         *ExpectedImage, FunctionConfig::KernelName);
     if (!ExpectedKernel)
       return ExpectedKernel.takeError();

     return *ExpectedKernel;
   }

   [[nodiscard]] auto createBuffers(std::size_t BufferSize) const {
     auto InBuffersTuple = std::apply(
         [&](auto... InTypeIdentities) {
           return std::make_tuple(
               Context->createManagedBuffer<
                   typename decltype(InTypeIdentities)::type>(BufferSize)...);
         },
         InTypeIdentitiesTuple{});
     auto OutBuffer = Context->createManagedBuffer<OutType>(BufferSize);

     return std::make_pair(std::move(InBuffersTuple), std::move(OutBuffer));
   }

   template <typename InBuffersTupleType>
   [[nodiscard]] ResultType
   processBatch(const InBuffersTupleType &InBuffersTuple,
                ManagedBuffer<OutType> &OutBuffer, std::size_t BatchSize,
                uint32_t GroupSize) const noexcept {
     const uint32_t NumGroups = (BatchSize + GroupSize - 1) / GroupSize;
     const auto KernelArgsTuple = std::apply(
         [&](const auto &...InBuffers) {
           return std::make_tuple(InBuffers.data()..., OutBuffer.data(),
                                  BatchSize);
         },
         InBuffersTuple);

     std::apply(
         [&](const auto &...KernelArgs) {
           Context->launchKernel(Kernel, NumGroups, GroupSize, KernelArgs...);
         },
         KernelArgsTuple);

     return check(InBuffersTuple, OutBuffer, BatchSize);
   }

   template <typename InBuffersTupleType>
   [[nodiscard]] static ResultType
   check(const InBuffersTupleType &InBuffersTuple,
         const ManagedBuffer<OutType> &OutBuffer,
         std::size_t BatchSize) noexcept {
     const auto InViewsTuple = std::apply(
         [&](auto &...InBuffers) {
           return std::make_tuple(
               llvm::ArrayRef(InBuffers.data(), BatchSize)...);
         },
         InBuffersTuple);
     const auto OutView = llvm::ArrayRef<OutType>(OutBuffer.data(), BatchSize);

     return Checker::check(InViewsTuple, OutView);
   }

   std::shared_ptr<DeviceContext> Context;
   std::string Provider;
   DeviceKernel<KernelSignature> Kernel;
 };
 } // namespace mathtest

 #endif // MATHTEST_GPUMATHTEST_HPP
	//===----------------------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file contains the definition of the GpuMathTest class, a test harness
	/// that orchestrates running a math function on a device (GPU) and verifying
	/// its results.
	///
	//===----------------------------------------------------------------------===//

	#ifndef MATHTEST_GPUMATHTEST_HPP
	#define MATHTEST_GPUMATHTEST_HPP

	#include "mathtest/DeviceContext.hpp"
	#include "mathtest/DeviceResources.hpp"
	#include "mathtest/HostRefChecker.hpp"
	#include "mathtest/InputGenerator.hpp"
	#include "mathtest/Support.hpp"
	#include "mathtest/TestResult.hpp"

	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Support/Error.h"

	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <memory>
	#include <string>
	#include <tuple>
	#include <utility>

	namespace mathtest {

	template <auto Func, typename Checker = HostRefChecker<Func>>
	class [[nodiscard]] GpuMathTest final {
	using FunctionTraits = FunctionTraits<Func>;
	using OutType = typename FunctionTraits::ReturnType;
	using InTypesTuple = typename FunctionTraits::ArgTypesTuple;

	template <typename... Ts>
	using PartialResultType = TestResult<OutType, Ts...>;
	using KernelSignature = KernelSignatureOf_t<Func>;

	template <typename... Ts>
	using TypeIdentitiesTuple = std::tuple<TypeIdentityOf<Ts>...>;
	using InTypeIdentitiesTuple =
	ApplyTupleTypes_t<InTypesTuple, TypeIdentitiesTuple>;

	static constexpr std::size_t DefaultBufferSize =
	DefaultBufferSizeFor_v<OutType, InTypesTuple>;
	static constexpr uint32_t DefaultGroupSize = 512;

	public:
	using FunctionConfig = FunctionConfig<Func>;
	using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
	using GeneratorType = ApplyTupleTypes_t<InTypesTuple, InputGenerator>;

	[[nodiscard]] static llvm::Expected<GpuMathTest>
	create(std::shared_ptr<DeviceContext> Context, llvm::StringRef Provider,
	llvm::StringRef DeviceBinaryDir) {
	assert(Context && "Context must not be null");

	auto ExpectedKernel = getKernel(*Context, Provider, DeviceBinaryDir);
	if (!ExpectedKernel)
	return ExpectedKernel.takeError();

	return GpuMathTest(std::move(Context), Provider, *ExpectedKernel);
	}

	ResultType run(GeneratorType &Generator,
	std::size_t BufferSize = DefaultBufferSize,
	uint32_t GroupSize = DefaultGroupSize) const noexcept {
	assert(BufferSize > 0 && "Buffer size must be a positive value");
	assert(GroupSize > 0 && "Group size must be a positive value");

	auto [InBuffersTuple, OutBuffer] = createBuffers(BufferSize);
	ResultType FinalResult;

	while (true) {
	const std::size_t BatchSize = std::apply(
	[&](auto &...Buffers) { return Generator.fill(Buffers...); },
	InBuffersTuple);

	if (BatchSize == 0)
	break;

	const auto BatchResult =
	processBatch(InBuffersTuple, OutBuffer, BatchSize, GroupSize);

	FinalResult.accumulate(BatchResult);
	}

	return FinalResult;
	}

	[[nodiscard]] std::shared_ptr<DeviceContext> getContext() const noexcept {
	return Context;
	}

	[[nodiscard]] std::string getProvider() const noexcept { return Provider; }

	private:
	explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
	llvm::StringRef Provider,
	DeviceKernel<KernelSignature> Kernel)
	: Context(std::move(Context)), Provider(Provider), Kernel(Kernel) {}

	static llvm::Expected<DeviceKernel<KernelSignature>>
	getKernel(const DeviceContext &Context, llvm::StringRef Provider,
	llvm::StringRef DeviceBinaryDir) {
	llvm::StringRef BinaryName = Provider;

	auto ExpectedImage = Context.loadBinary(DeviceBinaryDir, BinaryName);
	if (!ExpectedImage)
	return ExpectedImage.takeError();

	auto ExpectedKernel = Context.getKernel<KernelSignature>(
	*ExpectedImage, FunctionConfig::KernelName);
	if (!ExpectedKernel)
	return ExpectedKernel.takeError();

	return *ExpectedKernel;
	}

	[[nodiscard]] auto createBuffers(std::size_t BufferSize) const {
	auto InBuffersTuple = std::apply(
	[&](auto... InTypeIdentities) {
	return std::make_tuple(
	Context->createManagedBuffer<
	typename decltype(InTypeIdentities)::type>(BufferSize)...);
	},
	InTypeIdentitiesTuple{});
	auto OutBuffer = Context->createManagedBuffer<OutType>(BufferSize);

	return std::make_pair(std::move(InBuffersTuple), std::move(OutBuffer));
	}

	template <typename InBuffersTupleType>
	[[nodiscard]] ResultType
	processBatch(const InBuffersTupleType &InBuffersTuple,
	ManagedBuffer<OutType> &OutBuffer, std::size_t BatchSize,
	uint32_t GroupSize) const noexcept {
	const uint32_t NumGroups = (BatchSize + GroupSize - 1) / GroupSize;
	const auto KernelArgsTuple = std::apply(
	[&](const auto &...InBuffers) {
	return std::make_tuple(InBuffers.data()..., OutBuffer.data(),
	BatchSize);
	},
	InBuffersTuple);

	std::apply(
	[&](const auto &...KernelArgs) {
	Context->launchKernel(Kernel, NumGroups, GroupSize, KernelArgs...);
	},
	KernelArgsTuple);

	return check(InBuffersTuple, OutBuffer, BatchSize);
	}

	template <typename InBuffersTupleType>
	[[nodiscard]] static ResultType
	check(const InBuffersTupleType &InBuffersTuple,
	const ManagedBuffer<OutType> &OutBuffer,
	std::size_t BatchSize) noexcept {
	const auto InViewsTuple = std::apply(
	[&](auto &...InBuffers) {
	return std::make_tuple(
	llvm::ArrayRef(InBuffers.data(), BatchSize)...);
	},
	InBuffersTuple);
	const auto OutView = llvm::ArrayRef<OutType>(OutBuffer.data(), BatchSize);

	return Checker::check(InViewsTuple, OutView);
	}

	std::shared_ptr<DeviceContext> Context;
	std::string Provider;
	DeviceKernel<KernelSignature> Kernel;
	};
	} // namespace mathtest

	#endif // MATHTEST_GPUMATHTEST_HPP