|  | /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ | 
|  | |* | 
|  | |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | |* See https://llvm.org/LICENSE.txt for license information. | 
|  | |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | |* | 
|  | \*===----------------------------------------------------------------------===*/ | 
|  |  | 
|  | #include <assert.h> | 
|  | #include <limits.h> | 
|  | #include <stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include "InstrProfiling.h" | 
|  | #include "InstrProfilingInternal.h" | 
|  | #include "InstrProfilingUtil.h" | 
|  |  | 
|  | #define INSTR_PROF_VALUE_PROF_DATA | 
|  | #define INSTR_PROF_COMMON_API_IMPL | 
|  | #define INSTR_PROF_VALUE_PROF_MEMOP_API | 
|  | #include "profile/InstrProfData.inc" | 
|  |  | 
|  | static int hasStaticCounters = 1; | 
|  | static int OutOfNodesWarnings = 0; | 
|  | static int hasNonDefaultValsPerSite = 0; | 
|  | #define INSTR_PROF_MAX_VP_WARNS 10 | 
|  | #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 | 
|  | #define INSTR_PROF_VNODE_POOL_SIZE 1024 | 
|  |  | 
|  | #ifndef _MSC_VER | 
|  | /* A shared static pool in addition to the vnodes statically | 
|  | * allocated by the compiler.  */ | 
|  | COMPILER_RT_VISIBILITY ValueProfNode | 
|  | lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( | 
|  | COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); | 
|  | #endif | 
|  |  | 
|  | COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = | 
|  | INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; | 
|  |  | 
|  | COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) { | 
|  | const char *Str = 0; | 
|  | Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); | 
|  | if (Str && Str[0]) { | 
|  | VPMaxNumValsPerSite = atoi(Str); | 
|  | hasNonDefaultValsPerSite = 1; | 
|  | } | 
|  | if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) | 
|  | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; | 
|  | } | 
|  |  | 
|  | COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { | 
|  | VPMaxNumValsPerSite = MaxVals; | 
|  | hasNonDefaultValsPerSite = 1; | 
|  | } | 
|  |  | 
|  | /* This method is only used in value profiler mock testing.  */ | 
|  | COMPILER_RT_VISIBILITY void | 
|  | __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, | 
|  | uint32_t ValueKind, uint16_t NumValueSites) { | 
|  | #ifdef __GNUC__ | 
|  | #pragma GCC diagnostic push | 
|  | #pragma GCC diagnostic ignored "-Wcast-qual" | 
|  | #elif defined(__clang__) | 
|  | #pragma clang diagnostic push | 
|  | #pragma clang diagnostic ignored "-Wcast-qual" | 
|  | #endif | 
|  | *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; | 
|  | #ifdef __GNUC__ | 
|  | #pragma GCC diagnostic pop | 
|  | #elif defined(__clang__) | 
|  | #pragma clang diagnostic pop | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* This method is only used in value profiler mock testing.  */ | 
|  | COMPILER_RT_VISIBILITY const __llvm_profile_data * | 
|  | __llvm_profile_iterate_data(const __llvm_profile_data *Data) { | 
|  | return Data + 1; | 
|  | } | 
|  |  | 
|  | /* This method is only used in value profiler mock testing.  */ | 
|  | COMPILER_RT_VISIBILITY void * | 
|  | __llvm_get_function_addr(const __llvm_profile_data *Data) { | 
|  | return Data->FunctionPointer; | 
|  | } | 
|  |  | 
|  | /* Allocate an array that holds the pointers to the linked lists of | 
|  | * value profile counter nodes. The number of element of the array | 
|  | * is the total number of value profile sites instrumented. Returns | 
|  | * 0 if allocation fails. | 
|  | */ | 
|  |  | 
|  | static int allocateValueProfileCounters(__llvm_profile_data *Data) { | 
|  | uint64_t NumVSites = 0; | 
|  | uint32_t VKI; | 
|  |  | 
|  | /* This function will never be called when value site array is allocated | 
|  | statically at compile time.  */ | 
|  | hasStaticCounters = 0; | 
|  | /* When dynamic allocation is enabled, allow tracking the max number of | 
|  | * values allowd.  */ | 
|  | if (!hasNonDefaultValsPerSite) | 
|  | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; | 
|  |  | 
|  | for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) | 
|  | NumVSites += Data->NumValueSites[VKI]; | 
|  |  | 
|  | // If NumVSites = 0, calloc is allowed to return a non-null pointer. | 
|  | assert(NumVSites > 0 && "NumVSites can't be zero"); | 
|  | ValueProfNode **Mem = | 
|  | (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); | 
|  | if (!Mem) | 
|  | return 0; | 
|  | if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { | 
|  | free(Mem); | 
|  | return 0; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static ValueProfNode *allocateOneNode(void) { | 
|  | ValueProfNode *Node; | 
|  |  | 
|  | if (!hasStaticCounters) | 
|  | return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); | 
|  |  | 
|  | /* Early check to avoid value wrapping around.  */ | 
|  | if (CurrentVNode + 1 > EndVNode) { | 
|  | if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { | 
|  | PROF_WARN("Unable to track new values: %s. " | 
|  | " Consider using option -mllvm -vp-counters-per-site=<n> to " | 
|  | "allocate more" | 
|  | " value profile counters at compile time. \n", | 
|  | "Running out of static counters"); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  | Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); | 
|  | /* Due to section padding, EndVNode point to a byte which is one pass | 
|  | * an incomplete VNode, so we need to skip the last incomplete node. */ | 
|  | if (Node + 1 > EndVNode) | 
|  | return 0; | 
|  |  | 
|  | return Node; | 
|  | } | 
|  |  | 
|  | static COMPILER_RT_ALWAYS_INLINE void | 
|  | instrumentTargetValueImpl(uint64_t TargetValue, void *Data, | 
|  | uint32_t CounterIndex, uint64_t CountValue) { | 
|  | __llvm_profile_data *PData = (__llvm_profile_data *)Data; | 
|  | if (!PData) | 
|  | return; | 
|  | if (!CountValue) | 
|  | return; | 
|  | if (!PData->Values) { | 
|  | if (!allocateValueProfileCounters(PData)) | 
|  | return; | 
|  | } | 
|  |  | 
|  | ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; | 
|  | ValueProfNode *PrevVNode = NULL; | 
|  | ValueProfNode *MinCountVNode = NULL; | 
|  | ValueProfNode *CurVNode = ValueCounters[CounterIndex]; | 
|  | uint64_t MinCount = UINT64_MAX; | 
|  |  | 
|  | uint8_t VDataCount = 0; | 
|  | while (CurVNode) { | 
|  | if (TargetValue == CurVNode->Value) { | 
|  | CurVNode->Count += CountValue; | 
|  | return; | 
|  | } | 
|  | if (CurVNode->Count < MinCount) { | 
|  | MinCount = CurVNode->Count; | 
|  | MinCountVNode = CurVNode; | 
|  | } | 
|  | PrevVNode = CurVNode; | 
|  | CurVNode = CurVNode->Next; | 
|  | ++VDataCount; | 
|  | } | 
|  |  | 
|  | if (VDataCount >= VPMaxNumValsPerSite) { | 
|  | /* Bump down the min count node's count. If it reaches 0, | 
|  | * evict it. This eviction/replacement policy makes hot | 
|  | * targets more sticky while cold targets less so. In other | 
|  | * words, it makes it less likely for the hot targets to be | 
|  | * prematurally evicted during warmup/establishment period, | 
|  | * when their counts are still low. In a special case when | 
|  | * the number of values tracked is reduced to only one, this | 
|  | * policy will guarantee that the dominating target with >50% | 
|  | * total count will survive in the end. Note that this scheme | 
|  | * allows the runtime to track the min count node in an adaptive | 
|  | * manner. It can correct previous mistakes and eventually | 
|  | * lock on a cold target that is alread in stable state. | 
|  | * | 
|  | * In very rare cases,  this replacement scheme may still lead | 
|  | * to target loss. For instance, out of \c N value slots, \c N-1 | 
|  | * slots are occupied by luke warm targets during the warmup | 
|  | * period and the remaining one slot is competed by two or more | 
|  | * very hot targets. If those hot targets occur in an interleaved | 
|  | * way, none of them will survive (gain enough weight to throw out | 
|  | * other established entries) due to the ping-pong effect. | 
|  | * To handle this situation, user can choose to increase the max | 
|  | * number of tracked values per value site. Alternatively, a more | 
|  | * expensive eviction mechanism can be implemented. It requires | 
|  | * the runtime to track the total number of evictions per-site. | 
|  | * When the total number of evictions reaches certain threshold, | 
|  | * the runtime can wipe out more than one lowest count entries | 
|  | * to give space for hot targets. | 
|  | */ | 
|  | if (MinCountVNode->Count <= CountValue) { | 
|  | CurVNode = MinCountVNode; | 
|  | CurVNode->Value = TargetValue; | 
|  | CurVNode->Count = CountValue; | 
|  | } else | 
|  | MinCountVNode->Count -= CountValue; | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | CurVNode = allocateOneNode(); | 
|  | if (!CurVNode) | 
|  | return; | 
|  | CurVNode->Value = TargetValue; | 
|  | CurVNode->Count += CountValue; | 
|  |  | 
|  | uint32_t Success = 0; | 
|  | if (!ValueCounters[CounterIndex]) | 
|  | Success = | 
|  | COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); | 
|  | else if (PrevVNode && !PrevVNode->Next) | 
|  | Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); | 
|  |  | 
|  | if (!Success && !hasStaticCounters) { | 
|  | free(CurVNode); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | COMPILER_RT_VISIBILITY void | 
|  | __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, | 
|  | uint32_t CounterIndex) { | 
|  | instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); | 
|  | } | 
|  | COMPILER_RT_VISIBILITY void | 
|  | __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, | 
|  | uint32_t CounterIndex, | 
|  | uint64_t CountValue) { | 
|  | instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The target values are partitioned into multiple ranges. The range spec is | 
|  | * defined in InstrProfData.inc. | 
|  | */ | 
|  | COMPILER_RT_VISIBILITY void | 
|  | __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, | 
|  | uint32_t CounterIndex) { | 
|  | // Map the target value to the representative value of its range. | 
|  | uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue); | 
|  | __llvm_profile_instrument_target(RepValue, Data, CounterIndex); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * A wrapper struct that represents value profile runtime data. | 
|  | * Like InstrProfRecord class which is used by profiling host tools, | 
|  | * ValueProfRuntimeRecord also implements the abstract interfaces defined in | 
|  | * ValueProfRecordClosure so that the runtime data can be serialized using | 
|  | * shared C implementation. | 
|  | */ | 
|  | typedef struct ValueProfRuntimeRecord { | 
|  | const __llvm_profile_data *Data; | 
|  | ValueProfNode **NodesKind[IPVK_Last + 1]; | 
|  | uint8_t **SiteCountArray; | 
|  | } ValueProfRuntimeRecord; | 
|  |  | 
|  | /* ValueProfRecordClosure Interface implementation. */ | 
|  |  | 
|  | static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { | 
|  | return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; | 
|  | } | 
|  |  | 
|  | static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { | 
|  | uint32_t S = 0, I; | 
|  | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; | 
|  | if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) | 
|  | return 0; | 
|  | for (I = 0; I < Record->Data->NumValueSites[VK]; I++) | 
|  | S += Record->SiteCountArray[VK][I]; | 
|  | return S; | 
|  | } | 
|  |  | 
|  | static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, | 
|  | uint32_t S) { | 
|  | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; | 
|  | return Record->SiteCountArray[VK][S]; | 
|  | } | 
|  |  | 
|  | static ValueProfRuntimeRecord RTRecord; | 
|  | static ValueProfRecordClosure RTRecordClosure = { | 
|  | &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */ | 
|  | getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT, | 
|  | INSTR_PROF_NULLPTR, /* RemapValueData */ | 
|  | INSTR_PROF_NULLPTR, /* GetValueForSite, */ | 
|  | INSTR_PROF_NULLPTR  /* AllocValueProfData */ | 
|  | }; | 
|  |  | 
|  | static uint32_t | 
|  | initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, | 
|  | uint8_t *SiteCountArray[]) { | 
|  | unsigned I, J, S = 0, NumValueKinds = 0; | 
|  | ValueProfNode **Nodes = (ValueProfNode **)Data->Values; | 
|  | RTRecord.Data = Data; | 
|  | RTRecord.SiteCountArray = SiteCountArray; | 
|  | for (I = 0; I <= IPVK_Last; I++) { | 
|  | uint16_t N = Data->NumValueSites[I]; | 
|  | if (!N) | 
|  | continue; | 
|  |  | 
|  | NumValueKinds++; | 
|  |  | 
|  | RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; | 
|  | for (J = 0; J < N; J++) { | 
|  | /* Compute value count for each site. */ | 
|  | uint32_t C = 0; | 
|  | ValueProfNode *Site = | 
|  | Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; | 
|  | while (Site) { | 
|  | C++; | 
|  | Site = Site->Next; | 
|  | } | 
|  | if (C > UCHAR_MAX) | 
|  | C = UCHAR_MAX; | 
|  | RTRecord.SiteCountArray[I][J] = C; | 
|  | } | 
|  | S += N; | 
|  | } | 
|  | return NumValueKinds; | 
|  | } | 
|  |  | 
|  | static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, | 
|  | InstrProfValueData *Dst, | 
|  | ValueProfNode *StartNode, uint32_t N) { | 
|  | unsigned I; | 
|  | ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; | 
|  | for (I = 0; I < N; I++) { | 
|  | Dst[I].Value = VNode->Value; | 
|  | Dst[I].Count = VNode->Count; | 
|  | VNode = VNode->Next; | 
|  | } | 
|  | return VNode; | 
|  | } | 
|  |  | 
|  | static uint32_t getValueProfDataSizeWrapper(void) { | 
|  | return getValueProfDataSize(&RTRecordClosure); | 
|  | } | 
|  |  | 
|  | static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { | 
|  | return getNumValueDataForSiteRT(&RTRecord, VK, S); | 
|  | } | 
|  |  | 
|  | static VPDataReaderType TheVPDataReader = { | 
|  | initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, | 
|  | getFirstValueProfRecord,          getNumValueDataForSiteWrapper, | 
|  | getValueProfDataSizeWrapper,      getNextNValueData}; | 
|  |  | 
|  | COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) { | 
|  | return &TheVPDataReader; | 
|  | } |