[FS-AFDO][llvm-profgen] Generate profile with FS-AFDO discriminator

In order to support generating profile  with FS discriminator, three kind of changes are done in llvm-profgen:

1) Dissassemble .rodata section to check if FS discriminator var ('"__llvm_fs_discriminator__"') exists and set the corresponding flag in the binary.

2) Change the discriminator decoding in `getBaseDiscriminator` and `getDuplicationFactor`.

3) set true for `FunctionSamples::ProfileIsFS` to enable FS functionality in ProfileData.

Reviewed By: xur, hoy, wenlei

Differential Revision: https://reviews.llvm.org/D113296
diff --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.perfbin b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.perfbin
new file mode 100755
index 0000000..642dce7
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.perfbin
Binary files differ
diff --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.raw.prof
new file mode 100644
index 0000000..0121288
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator.raw.prof
@@ -0,0 +1,76 @@
+45
+540-540:1
+650-66d:1
+650-675:1
+650-689:1
+650-6a2:1
+682-689:1
+682-6a2:1
+686-689:1
+68b-6a2:1
+6b0-6b7:1
+6b0-6bf:1
+6b0-6c8:1
+6b0-6ca:1
+6b9-6bf:1
+6b9-6c8:1
+6b9-6ca:1
+6d0-6ea:1
+6d0-700:1
+6ec-700:1
+710-72f:1
+740-753:1
+740-75b:1
+740-75f:1
+740-76e:1
+743-753:1
+743-75b:1
+743-76e:1
+755-75b:1
+755-75f:1
+770-788:1
+790-79a:1
+790-7b8:1
+7b0-7b8:1
+7bb-7c8:1
+7cd-7d5:1
+7cd-7df:1
+7d7-7df:1
+810-82f:1
+834-860:1
+870-870:1
+875-8a1:1
+875-8bf:1
+875-8c3:1
+893-8bf:1
+8a7-8c3:1
+29
+66d->686:1
+675->682:1
+689->6b9:1
+6a2->7bb:1
+6b7->68b:1
+6bf->6d0:1
+6c8->6b0:1
+6ca->6ec:1
+6ea->6b0:1
+700->6b0:1
+72f->755:1
+753->770:1
+75b->743:1
+75f->740:1
+76e->740:1
+788->7bb:1
+79a->7d7:1
+7b8->650:1
+7b8->710:1
+7c8->790:1
+7d5->7b0:1
+7df->7cd:1
+7df->834:1
+82f->790:1
+860->8a7:1
+870->540:1
+8a1->810:1
+8bf->870:1
+8c3->893:1
diff --git a/llvm/test/tools/llvm-profgen/fs-discriminator.test b/llvm/test/tools/llvm-profgen/fs-discriminator.test
new file mode 100644
index 0000000..2fd52b8
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/fs-discriminator.test
@@ -0,0 +1,139 @@
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/fs-discriminator.raw.prof --binary=%S/Inputs/fs-discriminator.perfbin --output=%t1
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK
+; RUN: llvm-profgen --unsymbolized-profile=%S/Inputs/fs-discriminator.raw.prof --binary=%S/Inputs/fs-discriminator.perfbin --output=%t1
+; RUN: llvm-profdata show --sample --show-sec-info-only %t1 | FileCheck %s --check-prefix=CHECK-SECTION
+
+;CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator}
+
+;CHECK: partition_pivot_last:88:1
+;CHECK:  1: 4
+;CHECK:  2: 4
+;CHECK:  3: 4
+;CHECK:  3.1: 4
+;CHECK:  3.3: 4
+;CHECK:  3.3072: 5
+;CHECK:  3.7169: 4
+;CHECK:  3.7171: 4
+;CHECK:  4: 3
+;CHECK:  4.3072: 6
+;CHECK:  4.12800: 4
+;CHECK:  4.3221238272: 2
+;CHECK:  5: 2
+;CHECK:  5.5120: 2
+;CHECK:  6: 3
+;CHECK:  7: 3
+;CHECK:  5: swap:18
+;CHECK:   1: 2
+;CHECK:   1.1024: 2
+;CHECK:   1.4096: 2
+;CHECK:   2: 2
+;CHECK:   2.9216: 2
+;CHECK:   2.12288: 2
+;CHECK:   3: 2
+;CHECK:   3.512: 2
+;CHECK:   3.3584: 2
+;CHECK:  6: swap:12
+;CHECK:   1.14336: 3
+;CHECK:   2.7168: 3
+;CHECK:   2.11776: 3
+;CHECK:   3.13824: 3
+;CHECK: partition_pivot_first:41:1
+;CHECK:  0: 1
+;CHECK:  1: 1
+;CHECK:  2: 1
+;CHECK:  3: 1
+;CHECK:  3.1: 1
+;CHECK:  3.11265: 7
+;CHECK:  4: 7
+;CHECK:  4.1: 4
+;CHECK:  4.2: 2
+;CHECK:  4.3: 4
+;CHECK:  5: 1
+;CHECK:  6: 1
+;CHECK:  4.2: swap:6
+;CHECK:   1.7168: 2
+;CHECK:   2: 2
+;CHECK:   3.4608: 2
+;CHECK:  5: swap:4
+;CHECK:   1: 1
+;CHECK:   2.229376: 1
+;CHECK:   2.589824: 1
+;CHECK:   3: 1
+;CHECK: main:24:0
+;CHECK:  0: 0
+;CHECK:  3: 0
+;CHECK:  4.1: 1
+;CHECK:  4.3: 1
+;CHECK:  5.3: 4
+;CHECK:  5.1537: 4
+;CHECK:  6: 4
+;CHECK:  6.1: 3
+;CHECK:  6.3: 4
+;CHECK:  7: 1
+;CHECK:  8: 1 quick_sort:1
+;CHECK:  9: 1
+;CHECK:  11: 0
+;CHECK:  14: 0
+;CHECK: quick_sort:13:2
+;CHECK:  0: 2
+;CHECK:  1: 2
+;CHECK:  1.15360: 2
+;CHECK:  2: 2 partition_pivot_first:1 partition_pivot_last:1
+;CHECK:  3: 1 quick_sort:1
+;CHECK:  4: 2
+;CHECK:  6: 2
+
+
+; original code:
+; clang -O3 -g -mllvm --enable-fs-discriminator -fdebug-info-for-profiling qsort.c -o a.out
+#include <stdio.h>
+#include <stdlib.h>
+
+void swap(int *a, int *b) {
+	int t = *a;
+	*a = *b;
+	*b = t;
+}
+
+int partition_pivot_last(int* array, int low, int high) {
+	int pivot = array[high];
+	int i = low - 1;
+	for (int j = low; j < high; j++)
+		if (array[j] < pivot)
+			swap(&array[++i], &array[j]);
+	swap(&array[i + 1], &array[high]);
+	return (i + 1);
+}
+
+int partition_pivot_first(int* array, int low, int high) {
+	int pivot = array[low];
+	int i = low + 1;
+	for (int j = low + 1; j <= high; j++)
+		if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
+	swap(&array[i - 1], &array[low]);
+	return i - 1;
+}
+
+void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
+	if (low < high) {
+		int pi = (*partition_func)(array, low, high);
+		quick_sort(array, low, pi - 1, partition_func);
+		quick_sort(array, pi + 1, high, partition_func);
+	}
+}
+
+int main() {
+	const int size = 200;
+	int sum = 0;
+	int *array = malloc(size * sizeof(int));
+	for(int i = 0; i < 100 * 1000; i++) {
+		for(int j = 0; j < size; j++)
+			array[j] = j % 10 ? rand() % size: j;
+		int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
+		quick_sort(array, 0, size - 1, fptr);
+		sum += array[i % size];
+	}
+	printf("sum=%d\n", sum);
+
+	return 0;
+}
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e39fb01..6f6926e 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -429,6 +429,8 @@
 }
 
 void HybridPerfReader::unwindSamples() {
+  if (Binary->useFSDiscriminator())
+    exitWithError("FS discriminator is not supported in CS profile.");
   std::set<uint64_t> AllUntrackedCallsites;
   for (const auto &Item : AggregatedSamples) {
     const PerfSample *Sample = Item.first.getPtr();
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 82c3695..7d28bda 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ProfileGenerator.h"
+#include "ErrorHandling.h"
 #include "ProfiledBinary.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include <float.h>
@@ -93,16 +94,22 @@
 
 int CSProfileGenerator::MaxContextDepth = -1;
 
+bool ProfileGeneratorBase::UseFSDiscriminator = false;
+
 std::unique_ptr<ProfileGeneratorBase>
 ProfileGeneratorBase::create(ProfiledBinary *Binary,
                              const ContextSampleCounterMap &SampleCounters,
                              bool ProfileIsCS) {
   std::unique_ptr<ProfileGeneratorBase> Generator;
   if (ProfileIsCS) {
+    if (Binary->useFSDiscriminator())
+      exitWithError("FS discriminator is not supported in CS profile.");
     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
   } else {
     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
   }
+  ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
+  FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
 
   return Generator;
 }
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 979c209..c4b77aa 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -38,16 +38,24 @@
   virtual void generateProfile() = 0;
   void write();
 
-  static uint32_t getDuplicationFactor(unsigned Discriminator) {
-    return llvm::DILocation::getDuplicationFactorFromDiscriminator(
-        Discriminator);
+  static uint32_t
+  getDuplicationFactor(unsigned Discriminator,
+                       bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
+    return UseFSD ? 1
+                  : llvm::DILocation::getDuplicationFactorFromDiscriminator(
+                        Discriminator);
   }
 
-  static uint32_t getBaseDiscriminator(unsigned Discriminator) {
-    return DILocation::getBaseDiscriminatorFromDiscriminator(
-        Discriminator, /* IsFSDiscriminator */ false);
+  static uint32_t
+  getBaseDiscriminator(unsigned Discriminator,
+                       bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
+    return UseFSD ? Discriminator
+                  : DILocation::getBaseDiscriminatorFromDiscriminator(
+                        Discriminator, /* IsFSDiscriminator */ false);
   }
 
+  static bool UseFSDiscriminator;
+
 protected:
   // Use SampleProfileWriter to serialize profile map
   void write(std::unique_ptr<SampleProfileWriter> Writer,
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e08a00d..619af8d 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -261,7 +261,7 @@
   // Replace with decoded base discriminator
   for (auto &Frame : ContextVec) {
     Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
-        Frame.Location.Discriminator);
+        Frame.Location.Discriminator, UseFSDiscriminator);
   }
 
   assert(ContextVec.size() && "Context length should be at least 1");
@@ -566,6 +566,29 @@
         exitWithError("disassembling error", FileName);
     }
   }
+
+  // Dissassemble rodata section to check if FS discriminator symbol exists.
+  checkUseFSDiscriminator(Obj, AllSymbols);
+}
+
+void ProfiledBinary::checkUseFSDiscriminator(
+    const ELFObjectFileBase *Obj,
+    std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
+  const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
+  for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+       SI != SE; ++SI) {
+    const SectionRef &Section = *SI;
+    if (!Section.isData() || Section.getSize() == 0)
+      continue;
+    SectionSymbolsTy &Symbols = AllSymbols[Section];
+
+    for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+      if (Symbols[SI].Name == FSDiscriminatorVar) {
+        UseFSDiscriminator = true;
+        return;
+      }
+    }
+  }
 }
 
 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 003477b..a28ba82 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -244,6 +244,8 @@
 
   bool UsePseudoProbes = false;
 
+  bool UseFSDiscriminator = false;
+
   // Whether we need to symbolize all instructions to get function context size.
   bool TrackFuncContextSize = false;
 
@@ -260,6 +262,10 @@
 
   void decodePseudoProbe(const ELFObjectFileBase *Obj);
 
+  void
+  checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
+                          std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
+
   // Set up disassembler and related components.
   void setUpDisassembler(const ELFObjectFileBase *Obj);
   void setupSymbolizer();
@@ -358,6 +364,7 @@
   size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
 
   bool usePseudoProbes() const { return UsePseudoProbes; }
+  bool useFSDiscriminator() const { return UseFSDiscriminator; }
   // Get the index in CodeAddrOffsets for the address
   // As we might get an address which is not the code
   // here it would round to the next valid code address by