lib/AST/Interp/ByteCodeEmitter.cpp - llvm-project/clang - Git at Google

 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "ByteCodeEmitter.h"
 #include "Context.h"
 #include "Floating.h"
 #include "IntegralAP.h"
 #include "Opcode.h"
 #include "Program.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/Basic/Builtins.h"
 #include <type_traits>

 using namespace clang;
 using namespace clang::interp;

 /// Unevaluated builtins don't get their arguments put on the stack
 /// automatically. They instead operate on the AST of their Call
 /// Expression.
 /// Similar information is available via ASTContext::BuiltinInfo,
 /// but that is not correct for our use cases.
 static bool isUnevaluatedBuiltin(unsigned BuiltinID) {
   return BuiltinID == Builtin::BI__builtin_classify_type;
 }

 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   bool IsLambdaStaticInvoker = false;
   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
       MD && MD->isLambdaStaticInvoker()) {
     // For a lambda static invoker, we might have to pick a specialized
     // version if the lambda is generic. In that case, the picked function
     // will *NOT* be a static invoker anymore. However, it will still
     // be a non-static member function, this (usually) requiring an
     // instance pointer. We suppress that later in this function.
     IsLambdaStaticInvoker = true;

     const CXXRecordDecl *ClosureClass = MD->getParent();
     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
     if (ClosureClass->isGenericLambda()) {
       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
       assert(MD->isFunctionTemplateSpecialization() &&
              "A generic lambda's static-invoker function must be a "
              "template specialization");
       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
       FunctionTemplateDecl *CallOpTemplate =
           LambdaCallOp->getDescribedFunctionTemplate();
       void *InsertPos = nullptr;
       const FunctionDecl *CorrespondingCallOpSpecialization =
           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
       assert(CorrespondingCallOpSpecialization);
       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
     }
   }

   // Set up argument indices.
   unsigned ParamOffset = 0;
   SmallVector<PrimType, 8> ParamTypes;
   SmallVector<unsigned, 8> ParamOffsets;
   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;

   // If the return is not a primitive, a pointer to the storage where the
   // value is initialized in is passed as the first argument. See 'RVO'
   // elsewhere in the code.
   QualType Ty = FuncDecl->getReturnType();
   bool HasRVO = false;
   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
     HasRVO = true;
     ParamTypes.push_back(PT_Ptr);
     ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(PT_Ptr));
   }

   // If the function decl is a member decl, the next parameter is
   // the 'this' pointer. This parameter is pop()ed from the
   // InterpStack when calling the function.
   bool HasThisPointer = false;
   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
     if (!IsLambdaStaticInvoker) {
       HasThisPointer = MD->isInstance();
       if (MD->isImplicitObjectMemberFunction()) {
         ParamTypes.push_back(PT_Ptr);
         ParamOffsets.push_back(ParamOffset);
         ParamOffset += align(primSize(PT_Ptr));
       }
     }

     // Set up lambda capture to closure record field mapping.
     if (isLambdaCallOperator(MD)) {
       const Record *R = P.getOrCreateRecord(MD->getParent());
       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
       FieldDecl *LTC;

       MD->getParent()->getCaptureFields(LC, LTC);

       for (auto Cap : LC) {
         // Static lambdas cannot have any captures. If this one does,
         // it has already been diagnosed and we can only ignore it.
         if (MD->isStatic())
           return nullptr;

         unsigned Offset = R->getField(Cap.second)->Offset;
         this->LambdaCaptures[Cap.first] = {
             Offset, Cap.second->getType()->isReferenceType()};
       }
       if (LTC) {
         QualType CaptureType = R->getField(LTC)->Decl->getType();
         this->LambdaThisCapture = {R->getField(LTC)->Offset,
                                    CaptureType->isReferenceType() ||
                                        CaptureType->isPointerType()};
       }
     }
   }

   // Assign descriptors to all parameters.
   // Composite objects are lowered to pointers.
   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
     std::optional<PrimType> T = Ctx.classify(PD->getType());
     PrimType PT = T.value_or(PT_Ptr);
     Descriptor *Desc = P.createDescriptor(PD, PT);
     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
     Params.insert({PD, {ParamOffset, T != std::nullopt}});
     ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(PT));
     ParamTypes.push_back(PT);
   }

   // Create a handle over the emitted code.
   Function *Func = P.getFunction(FuncDecl);
   if (!Func) {
     bool IsUnevaluatedBuiltin = false;
     if (unsigned BI = FuncDecl->getBuiltinID())
       IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI);

     Func =
         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
                          std::move(ParamDescriptors), std::move(ParamOffsets),
                          HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
   }

   assert(Func);
   // For not-yet-defined functions, we only create a Function instance and
   // compile their body later.
   if (!FuncDecl->isDefined()) {
     Func->setDefined(false);
     return Func;
   }

   Func->setDefined(true);

   // Lambda static invokers are a special case that we emit custom code for.
   bool IsEligibleForCompilation = false;
   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
   if (!IsEligibleForCompilation)
     IsEligibleForCompilation =
         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();

   // Compile the function body.
   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
     Func->setIsFullyCompiled(true);
     return Func;
   }

   // Create scopes from descriptors.
   llvm::SmallVector<Scope, 2> Scopes;
   for (auto &DS : Descriptors) {
     Scopes.emplace_back(std::move(DS));
   }

   // Set the function's code.
   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
                 std::move(Scopes), FuncDecl->hasBody());
   Func->setIsFullyCompiled(true);
   return Func;
 }

 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
   NextLocalOffset += sizeof(Block);
   unsigned Location = NextLocalOffset;
   NextLocalOffset += align(D->getAllocSize());
   return {Location, D};
 }

 void ByteCodeEmitter::emitLabel(LabelTy Label) {
   const size_t Target = Code.size();
   LabelOffsets.insert({Label, Target});

   if (auto It = LabelRelocs.find(Label);
       It != LabelRelocs.end()) {
     for (unsigned Reloc : It->second) {
       using namespace llvm::support;

       // Rewrite the operand of all jumps to this label.
       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
       assert(aligned(Location));
       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
     }
     LabelRelocs.erase(It);
   }
 }

 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
   // Compute the PC offset which the jump is relative to.
   const int64_t Position =
       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
   assert(aligned(Position));

   // If target is known, compute jump offset.
   if (auto It = LabelOffsets.find(Label);
       It != LabelOffsets.end())
     return It->second - Position;

   // Otherwise, record relocation and return dummy offset.
   LabelRelocs[Label].push_back(Position);
   return 0ull;
 }

 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
 /// Pointers will be automatically marshalled as 32-bit IDs.
 template <typename T>
 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
                  bool &Success) {
   size_t Size;

   if constexpr (std::is_pointer_v<T>)
     Size = sizeof(uint32_t);
   else
     Size = sizeof(T);

   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
     Success = false;
     return;
   }

   // Access must be aligned!
   size_t ValPos = align(Code.size());
   Size = align(Size);
   assert(aligned(ValPos + Size));
   Code.resize(ValPos + Size);

   if constexpr (!std::is_pointer_v<T>) {
     new (Code.data() + ValPos) T(Val);
   } else {
     uint32_t ID = P.getOrCreateNativePointer(Val);
     new (Code.data() + ValPos) uint32_t(ID);
   }
 }

 /// Emits a serializable value. These usually (potentially) contain
 /// heap-allocated memory and aren't trivially copyable.
 template <typename T>
 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
                            bool &Success) {
   size_t Size = Val.bytesToSerialize();

   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
     Success = false;
     return;
   }

   // Access must be aligned!
   size_t ValPos = align(Code.size());
   Size = align(Size);
   assert(aligned(ValPos + Size));
   Code.resize(ValPos + Size);

   Val.serialize(Code.data() + ValPos);
 }

 template <>
 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
           bool &Success) {
   emitSerialized(Code, Val, Success);
 }

 template <>
 void emit(Program &P, std::vector<std::byte> &Code,
           const IntegralAP<false> &Val, bool &Success) {
   emitSerialized(Code, Val, Success);
 }

 template <>
 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
           bool &Success) {
   emitSerialized(Code, Val, Success);
 }

 template <typename... Tys>
 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
   bool Success = true;

   // The opcode is followed by arguments. The source info is
   // attached to the address after the opcode.
   emit(P, Code, Op, Success);
   if (SI)
     SrcMap.emplace_back(Code.size(), SI);

   (..., emit(P, Code, Args, Success));
   return Success;
 }

 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
   return emitJt(getOffset(Label), SourceInfo{});
 }

 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
   return emitJf(getOffset(Label), SourceInfo{});
 }

 bool ByteCodeEmitter::jump(const LabelTy &Label) {
   return emitJmp(getOffset(Label), SourceInfo{});
 }

 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
   emitLabel(Label);
   return true;
 }

 //===----------------------------------------------------------------------===//
 // Opcode emitters
 //===----------------------------------------------------------------------===//

 #define GET_LINK_IMPL
 #include "Opcodes.inc"
 #undef GET_LINK_IMPL
	//===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "ByteCodeEmitter.h"
	#include "Context.h"
	#include "Floating.h"
	#include "IntegralAP.h"
	#include "Opcode.h"
	#include "Program.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/Basic/Builtins.h"
	#include <type_traits>

	using namespace clang;
	using namespace clang::interp;

	/// Unevaluated builtins don't get their arguments put on the stack
	/// automatically. They instead operate on the AST of their Call
	/// Expression.
	/// Similar information is available via ASTContext::BuiltinInfo,
	/// but that is not correct for our use cases.
	static bool isUnevaluatedBuiltin(unsigned BuiltinID) {
	return BuiltinID == Builtin::BI__builtin_classify_type;
	}

	Function ByteCodeEmitter::compileFunc(const FunctionDecl FuncDecl) {
	bool IsLambdaStaticInvoker = false;
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
	MD && MD->isLambdaStaticInvoker()) {
	// For a lambda static invoker, we might have to pick a specialized
	// version if the lambda is generic. In that case, the picked function
	// will NOT be a static invoker anymore. However, it will still
	// be a non-static member function, this (usually) requiring an
	// instance pointer. We suppress that later in this function.
	IsLambdaStaticInvoker = true;

	const CXXRecordDecl *ClosureClass = MD->getParent();
	assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
	if (ClosureClass->isGenericLambda()) {
	const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
	assert(MD->isFunctionTemplateSpecialization() &&
	"A generic lambda's static-invoker function must be a "
	"template specialization");
	const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
	FunctionTemplateDecl *CallOpTemplate =
	LambdaCallOp->getDescribedFunctionTemplate();
	void *InsertPos = nullptr;
	const FunctionDecl *CorrespondingCallOpSpecialization =
	CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
	assert(CorrespondingCallOpSpecialization);
	FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
	}
	}

	// Set up argument indices.
	unsigned ParamOffset = 0;
	SmallVector<PrimType, 8> ParamTypes;
	SmallVector<unsigned, 8> ParamOffsets;
	llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;

	// If the return is not a primitive, a pointer to the storage where the
	// value is initialized in is passed as the first argument. See 'RVO'
	// elsewhere in the code.
	QualType Ty = FuncDecl->getReturnType();
	bool HasRVO = false;
	if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
	HasRVO = true;
	ParamTypes.push_back(PT_Ptr);
	ParamOffsets.push_back(ParamOffset);
	ParamOffset += align(primSize(PT_Ptr));
	}

	// If the function decl is a member decl, the next parameter is
	// the 'this' pointer. This parameter is pop()ed from the
	// InterpStack when calling the function.
	bool HasThisPointer = false;
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
	if (!IsLambdaStaticInvoker) {
	HasThisPointer = MD->isInstance();
	if (MD->isImplicitObjectMemberFunction()) {
	ParamTypes.push_back(PT_Ptr);
	ParamOffsets.push_back(ParamOffset);
	ParamOffset += align(primSize(PT_Ptr));
	}
	}

	// Set up lambda capture to closure record field mapping.
	if (isLambdaCallOperator(MD)) {
	const Record *R = P.getOrCreateRecord(MD->getParent());
	llvm::DenseMap<const ValueDecl , FieldDecl > LC;
	FieldDecl *LTC;

	MD->getParent()->getCaptureFields(LC, LTC);

	for (auto Cap : LC) {
	// Static lambdas cannot have any captures. If this one does,
	// it has already been diagnosed and we can only ignore it.
	if (MD->isStatic())
	return nullptr;

	unsigned Offset = R->getField(Cap.second)->Offset;
	this->LambdaCaptures[Cap.first] = {
	Offset, Cap.second->getType()->isReferenceType()};
	}
	if (LTC) {
	QualType CaptureType = R->getField(LTC)->Decl->getType();
	this->LambdaThisCapture = {R->getField(LTC)->Offset,
	CaptureType->isReferenceType() \|\|
	CaptureType->isPointerType()};
	}
	}
	}

	// Assign descriptors to all parameters.
	// Composite objects are lowered to pointers.
	for (const ParmVarDecl *PD : FuncDecl->parameters()) {
	std::optional<PrimType> T = Ctx.classify(PD->getType());
	PrimType PT = T.value_or(PT_Ptr);
	Descriptor *Desc = P.createDescriptor(PD, PT);
	ParamDescriptors.insert({ParamOffset, {PT, Desc}});
	Params.insert({PD, {ParamOffset, T != std::nullopt}});
	ParamOffsets.push_back(ParamOffset);
	ParamOffset += align(primSize(PT));
	ParamTypes.push_back(PT);
	}

	// Create a handle over the emitted code.
	Function *Func = P.getFunction(FuncDecl);
	if (!Func) {
	bool IsUnevaluatedBuiltin = false;
	if (unsigned BI = FuncDecl->getBuiltinID())
	IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI);

	Func =
	P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
	std::move(ParamDescriptors), std::move(ParamOffsets),
	HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
	}

	assert(Func);
	// For not-yet-defined functions, we only create a Function instance and
	// compile their body later.
	if (!FuncDecl->isDefined()) {
	Func->setDefined(false);
	return Func;
	}

	Func->setDefined(true);

	// Lambda static invokers are a special case that we emit custom code for.
	bool IsEligibleForCompilation = false;
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
	IsEligibleForCompilation = MD->isLambdaStaticInvoker();
	if (!IsEligibleForCompilation)
	IsEligibleForCompilation =
	FuncDecl->isConstexpr() \|\| FuncDecl->hasAttr<MSConstexprAttr>();

	// Compile the function body.
	if (!IsEligibleForCompilation \|\| !visitFunc(FuncDecl)) {
	Func->setIsFullyCompiled(true);
	return Func;
	}

	// Create scopes from descriptors.
	llvm::SmallVector<Scope, 2> Scopes;
	for (auto &DS : Descriptors) {
	Scopes.emplace_back(std::move(DS));
	}

	// Set the function's code.
	Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
	std::move(Scopes), FuncDecl->hasBody());
	Func->setIsFullyCompiled(true);
	return Func;
	}

	Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
	NextLocalOffset += sizeof(Block);
	unsigned Location = NextLocalOffset;
	NextLocalOffset += align(D->getAllocSize());
	return {Location, D};
	}

	void ByteCodeEmitter::emitLabel(LabelTy Label) {
	const size_t Target = Code.size();
	LabelOffsets.insert({Label, Target});

	if (auto It = LabelRelocs.find(Label);
	It != LabelRelocs.end()) {
	for (unsigned Reloc : It->second) {
	using namespace llvm::support;

	// Rewrite the operand of all jumps to this label.
	void *Location = Code.data() + Reloc - align(sizeof(int32_t));
	assert(aligned(Location));
	const int32_t Offset = Target - static_cast<int64_t>(Reloc);
	endian::write<int32_t, llvm::endianness::native>(Location, Offset);
	}
	LabelRelocs.erase(It);
	}
	}

	int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
	// Compute the PC offset which the jump is relative to.
	const int64_t Position =
	Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
	assert(aligned(Position));

	// If target is known, compute jump offset.
	if (auto It = LabelOffsets.find(Label);
	It != LabelOffsets.end())
	return It->second - Position;

	// Otherwise, record relocation and return dummy offset.
	LabelRelocs[Label].push_back(Position);
	return 0ull;
	}

	/// Helper to write bytecode and bail out if 32-bit offsets become invalid.
	/// Pointers will be automatically marshalled as 32-bit IDs.
	template <typename T>
	static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
	bool &Success) {
	size_t Size;

	if constexpr (std::is_pointer_v<T>)
	Size = sizeof(uint32_t);
	else
	Size = sizeof(T);

	if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
	Success = false;
	return;
	}

	// Access must be aligned!
	size_t ValPos = align(Code.size());
	Size = align(Size);
	assert(aligned(ValPos + Size));
	Code.resize(ValPos + Size);

	if constexpr (!std::is_pointer_v<T>) {
	new (Code.data() + ValPos) T(Val);
	} else {
	uint32_t ID = P.getOrCreateNativePointer(Val);
	new (Code.data() + ValPos) uint32_t(ID);
	}
	}

	/// Emits a serializable value. These usually (potentially) contain
	/// heap-allocated memory and aren't trivially copyable.
	template <typename T>
	static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
	bool &Success) {
	size_t Size = Val.bytesToSerialize();

	if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
	Success = false;
	return;
	}

	// Access must be aligned!
	size_t ValPos = align(Code.size());
	Size = align(Size);
	assert(aligned(ValPos + Size));
	Code.resize(ValPos + Size);

	Val.serialize(Code.data() + ValPos);
	}

	template <>
	void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
	bool &Success) {
	emitSerialized(Code, Val, Success);
	}

	template <>
	void emit(Program &P, std::vector<std::byte> &Code,
	const IntegralAP<false> &Val, bool &Success) {
	emitSerialized(Code, Val, Success);
	}

	template <>
	void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
	bool &Success) {
	emitSerialized(Code, Val, Success);
	}

	template <typename... Tys>
	bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
	bool Success = true;

	// The opcode is followed by arguments. The source info is
	// attached to the address after the opcode.
	emit(P, Code, Op, Success);
	if (SI)
	SrcMap.emplace_back(Code.size(), SI);

	(..., emit(P, Code, Args, Success));
	return Success;
	}

	bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
	return emitJt(getOffset(Label), SourceInfo{});
	}

	bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
	return emitJf(getOffset(Label), SourceInfo{});
	}

	bool ByteCodeEmitter::jump(const LabelTy &Label) {
	return emitJmp(getOffset(Label), SourceInfo{});
	}

	bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
	emitLabel(Label);
	return true;
	}

	//===----------------------------------------------------------------------===//
	// Opcode emitters
	//===----------------------------------------------------------------------===//

	#define GET_LINK_IMPL
	#include "Opcodes.inc"
	#undef GET_LINK_IMPL