[SE] KernelSpec return best PTX Summary: Before, the kernel spec would only return PTX for exactly the requested compute capability. With this patch it will now return the PTX with the largest compute capability that does not exceed that requested compute capability. Reviewers: jlebar Subscribers: jprice, jlebar, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D24531 llvm-svn: 281417 GitOrigin-RevId: b459eb352986891bb0ec4c146954c2627ed6fc8e

commit: c56393365f543c6ca789cf84f95d3a311c7bfae5 [log] [tgz]
author: Jason Henline <jhen@google.com> Tue Sep 13 23:29:25 2016 +0000
committer: Copybara-Service <copybara-worker@google.com> Tue Sep 01 01:04:50 2020 -0700
tree: 234ee357e8c04be539b3aaf9021f6b4b912898d2
parent: 6f7913882bfae8b7f331b70a5d622890278e4b94 [diff]
diff --git a/streamexecutor/include/streamexecutor/KernelSpec.h b/streamexecutor/include/streamexecutor/KernelSpec.h
index caf6f1b..a6a2930 100644
--- a/streamexecutor/include/streamexecutor/KernelSpec.h
+++ b/streamexecutor/include/streamexecutor/KernelSpec.h

@@ -121,12 +121,11 @@
       llvm::StringRef KernelName,
       const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
 
-  /// Returns a pointer to the PTX code for the requested compute capability.
+  /// Returns a pointer to the PTX code for the greatest compute capability not
+  /// exceeding the requested compute capability.
   ///
-  /// Returns nullptr on failed lookup (if the requested compute capability is
-  /// not available). Matches exactly the specified compute capability. Doesn't
-  /// try to do anything smart like finding the next best compute capability if
-  /// the specified capability cannot be found.
+  /// Returns nullptr on failed lookup (if the requested version is not
+  /// available and no lower versions are available).
   const char *getCode(int ComputeCapabilityMajor,
                       int ComputeCapabilityMinor) const;
 

diff --git a/streamexecutor/lib/KernelSpec.cpp b/streamexecutor/lib/KernelSpec.cpp
index b5753a4..951ea8f 100644
--- a/streamexecutor/lib/KernelSpec.cpp
+++ b/streamexecutor/lib/KernelSpec.cpp

@@ -31,12 +31,13 @@
 
 const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
                                          int ComputeCapabilityMinor) const {
-  auto PTXIter =
-      PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+  auto Iterator =
+      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
           ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (PTXIter == PTXByComputeCapability.end())
+  if (Iterator == PTXByComputeCapability.begin())
     return nullptr;
-  return PTXIter->second;
+  --Iterator;
+  return Iterator->second;
 }
 
 CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,

diff --git a/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
index fc9eb54..486a350 100644
--- a/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
+++ b/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp

@@ -30,8 +30,9 @@
   const char *PTXCodeString = "Dummy PTX code";
   se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
 }
 
 TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
@@ -40,9 +41,10 @@
   se::CUDAPTXInMemorySpec Spec(
       "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
   EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
 }
 
 TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@
   EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
   EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
-  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
   EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
commit	c56393365f543c6ca789cf84f95d3a311c7bfae5	[log] [tgz]
author	Jason Henline <jhen@google.com>	Tue Sep 13 23:29:25 2016 +0000
committer	Copybara-Service <copybara-worker@google.com>	Tue Sep 01 01:04:50 2020 -0700
tree	234ee357e8c04be539b3aaf9021f6b4b912898d2
parent	6f7913882bfae8b7f331b70a5d622890278e4b94 [diff]