diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h index ca6c0ad706..0559e17afe 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h @@ -827,8 +827,7 @@ template struct DenseMapInfo> { // LLVM is currently overhauling its casting system. Use the new variant once // possible! -// Note: The new variant (With CastInfo) is not tested yet! -#if LLVM_MAJOR < 15 +#if LLVM_VERSION_MAJOR < 15 template struct isa_impl_cl> { @@ -876,7 +875,7 @@ cast_or_null(const psr::EdgeFunction &EF) noexcept { // NOLINT template struct CastIsPossible> { static inline bool isPossible(const psr::EdgeFunction &EF) noexcept { - return EF->template isa(); + return EF.template isa(); } }; diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index bfbc9d2333..72c4aa1a3f 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -40,13 +40,67 @@ template struct EdgeIdentity final { const EdgeFunction &OtherFunction); }; +template struct ConstantEdgeFunction { + using l_t = L; + using JLattice = JoinLatticeTraits; + using value_type = typename NonTopBotValue::type; + + [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const + noexcept(std::is_nothrow_constructible_v) { + static_assert(IsEdgeFunction); + return Value; + } + + template >> + [[nodiscard]] static EdgeFunction + compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction); + + template >> + [[nodiscard]] static EdgeFunction + join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction); + + [[nodiscard]] constexpr bool isConstant() const noexcept { return true; } + + // -- constant data member + + value_type Value{}; +}; + +template >>> +[[nodiscard]] bool operator==(ConstantEdgeFunction LHS, + ConstantEdgeFunction RHS) noexcept { + return LHS.Value == RHS.Value; +} + +template >>> +[[nodiscard]] bool operator==(const ConstantEdgeFunction &LHS, + const ConstantEdgeFunction &RHS) noexcept { + return LHS.Value == RHS.Value; +} + +template +[[nodiscard]] llvm::raw_ostream & +operator<<(llvm::raw_ostream &OS, ByConstRef> Id) { + OS << "ConstantEF"; + if constexpr (is_llvm_printable_v< + typename ConstantEdgeFunction::value_type>) { + OS << '[' << Id.Value << ']'; + } + return OS; +} + template struct AllBottom final { using l_t = L; using JLattice = JoinLatticeTraits; [[no_unique_address]] std::conditional_t, EmptyType, - l_t> - BottomValue; + l_t> BottomValue; [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const noexcept { static_assert(std::is_trivially_copyable_v); @@ -61,7 +115,25 @@ template struct AllBottom final { [[nodiscard]] static EdgeFunction compose(EdgeFunctionRef This, const EdgeFunction &SecondFunction) { - return SecondFunction.isConstant() ? SecondFunction : This; + if (SecondFunction.isConstant()) { + return SecondFunction; + } + + if constexpr (HasJoinLatticeTraits) { + auto ConstVal = SecondFunction.computeTarget(JLattice::bottom()); + if (ConstVal == JLattice::bottom()) { + return This; + } + + return ConstantEdgeFunction{ + NonTopBotValue::unwrap(std::move(ConstVal))}; + } else { + // Note: This used to be the default behavior, but it appears to be too + // restrictive + return This; + } + + // return SecondFunction.isConstant() ? SecondFunction : This; } [[nodiscard]] static EdgeFunction @@ -85,8 +157,7 @@ template struct AllTop final { using JLattice = JoinLatticeTraits; [[no_unique_address]] std::conditional_t, EmptyType, - l_t> - TopValue; + l_t> TopValue; [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const noexcept { static_assert(std::is_trivially_copyable_v); @@ -149,97 +220,6 @@ defaultComposeOrNull(const EdgeFunction &This, return nullptr; } -template struct ConstantEdgeFunction { - using l_t = L; - using JLattice = JoinLatticeTraits; - using value_type = typename NonTopBotValue::type; - - [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const - noexcept(std::is_nothrow_constructible_v) { - static_assert(IsEdgeFunction); - return Value; - } - - template >> - [[nodiscard]] static EdgeFunction - compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } - - auto ConstVal = SecondFunction.computeTarget(This->Value); - if constexpr (!EdgeFunctionBase::IsSOOCandidate) { - if (ConstVal == This->Value) { - return This; - } - } - - if constexpr (AreEqualityComparable) { - if (JLattice::bottom() == ConstVal) { - return AllBottom{}; - } - } else { - if (l_t(JLattice::bottom()) == ConstVal) { - return AllBottom{}; - } - } - - if constexpr (AreEqualityComparable) { - if (JLattice::top() == ConstVal) { - /// TODO: Can this ever happen? - return AllTop{}; - } - } else { - if (l_t(JLattice::top()) == ConstVal) { - /// TODO: Can this ever happen? - return AllTop{}; - } - } - - return ConstantEdgeFunction{ - NonTopBotValue::unwrap(std::move(ConstVal))}; - } - - template >> - [[nodiscard]] static EdgeFunction - join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction); - - [[nodiscard]] constexpr bool isConstant() const noexcept { return true; } - - // -- constant data member - - value_type Value{}; -}; - -template >>> -[[nodiscard]] bool operator==(ConstantEdgeFunction LHS, - ConstantEdgeFunction RHS) noexcept { - return LHS.Value == RHS.Value; -} - -template >>> -[[nodiscard]] bool operator==(const ConstantEdgeFunction &LHS, - const ConstantEdgeFunction &RHS) noexcept { - return LHS.Value == RHS.Value; -} - -template -[[nodiscard]] llvm::raw_ostream & -operator<<(llvm::raw_ostream &OS, ByConstRef> Id) { - OS << "ConstantEF"; - if constexpr (is_llvm_printable_v< - typename ConstantEdgeFunction::value_type>) { - OS << '[' << Id.Value << ']'; - } - return OS; -} - template struct EdgeFunctionComposer { using l_t = L; @@ -461,6 +441,47 @@ EdgeFunction EdgeIdentity::join(EdgeFunctionRef This, return OtherFunction.joinWith(This); } +template +template +EdgeFunction +ConstantEdgeFunction::compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) { + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + auto ConstVal = SecondFunction.computeTarget(This->Value); + if constexpr (!EdgeFunctionBase::IsSOOCandidate) { + if (ConstVal == This->Value) { + return This; + } + } + + if constexpr (AreEqualityComparable) { + if (JLattice::bottom() == ConstVal) { + return AllBottom{}; + } + } else { + if (L(JLattice::bottom()) == ConstVal) { + return AllBottom{}; + } + } + + if constexpr (AreEqualityComparable) { + if (JLattice::top() == ConstVal) { + /// TODO: Can this ever happen? + return AllTop{}; + } + } else { + if (L(JLattice::top()) == ConstVal) { + /// TODO: Can this ever happen? + return AllTop{}; + } + } + + return ConstantEdgeFunction{NonTopBotValue::unwrap(std::move(ConstVal))}; +} + template template EdgeFunction diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index fbb2693c95..b14ca90f75 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -26,13 +26,11 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" -#include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/Soundness.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" #include "llvm/Support/raw_ostream.h" diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 646ff71322..ffaa42caef 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -12,6 +12,8 @@ #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" +#include "llvm/ADT/StringMap.h" + #include namespace llvm { @@ -38,7 +40,14 @@ class LLVMVFTableProvider { [[nodiscard]] const LLVMVFTable * getVFTableOrNull(const llvm::DIType *Type) const; + [[nodiscard]] const llvm::GlobalVariable * + getVFTableGlobal(const llvm::DIType *Type) const; + + [[nodiscard]] const llvm::GlobalVariable * + getVFTableGlobal(llvm::StringRef ClearTypeName) const; + private: + llvm::StringMap ClearNameTVMap; std::unordered_map TypeVFTMap; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index c59717c25f..205a1431f1 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -113,6 +113,10 @@ class Resolver { [[nodiscard]] virtual std::string str() const = 0; + /// Whether the ICFG needs to reconsider all dynamic call-sites once there + /// have been changes through handlePossibleTargets(). + /// + /// Make false for performance (may be less sound then) [[nodiscard]] virtual bool mutatesHelperAnalysisInformation() const noexcept { // Conservatively returns true. Override if possible return true; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index df946ae27a..d9a1d9a931 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -24,7 +24,7 @@ struct SVFGCache; /// Conforms to the ICFGBase CRTP interface. /// /// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or -/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// SparseIDE optimization after Karakaya et al. "Symbol-Specific Sparsification /// of Interprocedural Distributive Environment Problems" /// class SparseLLVMBasedICFG diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index 64010f2935..25b773d675 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -34,7 +34,7 @@ struct CFGTraits : CFGTraits {}; /// It still owns the sparse value-flow graphs. /// /// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or -/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// SparseIDE optimization after Karakaya et al. "Symbol-Specific Sparsification /// of Interprocedural Distributive Environment Problems" /// class SparseLLVMBasedICFGView diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 0b970fcfed..6d334273f1 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -21,6 +21,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/raw_ostream.h" @@ -56,6 +57,14 @@ class LLVMProjectIRDB : public ProjectIRDBBase { /// ownership of it. The module is optionally being preprocessed. explicit LLVMProjectIRDB(std::unique_ptr Mod, bool DoPreprocessing = true); + /// Initializes the new ProjectIRDB with the given IR Module and takes + /// ownership of it. The module is optionally being preprocessed. Takes the + /// given LLVMContext and binds its lifetime to the lifetime of the + /// constructed ProjectIRDB + explicit LLVMProjectIRDB(std::unique_ptr Mod, + std::unique_ptr Ctx, + bool DoPreprocessing = true); + /// Parses the given LLVM IR file and owns the resulting IR Module. /// If an error occurs, an error message is written to stderr and subsequent /// calls to isValid() return false. @@ -63,7 +72,10 @@ class LLVMProjectIRDB : public ProjectIRDBBase { bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14); LLVMProjectIRDB(const LLVMProjectIRDB &) = delete; - LLVMProjectIRDB &operator=(LLVMProjectIRDB &) = delete; + LLVMProjectIRDB &operator=(const LLVMProjectIRDB &) = delete; + + LLVMProjectIRDB(LLVMProjectIRDB &&) noexcept = default; + LLVMProjectIRDB &operator=(LLVMProjectIRDB &&) noexcept = default; ~LLVMProjectIRDB(); @@ -74,6 +86,10 @@ class LLVMProjectIRDB : public ProjectIRDBBase { getParsedIRModuleOrNull(llvm::MemoryBufferRef IRFileContent, llvm::LLVMContext &Ctx) noexcept; + [[nodiscard]] static llvm::ErrorOr + load(const llvm::Twine &IRFileName, + bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14); + /// Also use the const overload using ProjectIRDBBase::getFunction; /// Non-const overload @@ -163,7 +179,8 @@ class LLVMProjectIRDB : public ProjectIRDBBase { /// the preprocessing as well void preprocessModule(llvm::Module *NonConstMod); - llvm::LLVMContext Ctx; + // LLVMContext is not movable, so wrap it into a unique_ptr + std::unique_ptr Ctx; MaybeUniquePtr Mod = nullptr; size_t IdOffset = 0; llvm::SmallVector IdToInst; diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 1292499460..3a96157e36 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -103,6 +103,7 @@ std::string llvmIRToShortString(const llvm::Value *V); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Value *V); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Instruction *V); +LLVM_DUMP_METHOD void dumpIRValue(const llvm::Function *V); /** * @brief Returns all LLVM Global Values that are used in the given LLVM @@ -192,10 +193,11 @@ const llvm::StoreInst *getNthStoreInstruction(const llvm::Function *F, unsigned StoNo); llvm::SmallVector -getAllExitPoints(const llvm::Function *F); +getAllExitPoints(const llvm::Function *F, bool IncludeResume = true); void appendAllExitPoints( const llvm::Function *F, - llvm::SmallVectorImpl &ExitPoints); + llvm::SmallVectorImpl &ExitPoints, + bool IncludeResume = true); /** * @brief Returns the LLVM Module to which the given LLVM Value belongs to. diff --git a/include/phasar/Utils/Macros.h b/include/phasar/Utils/Macros.h index 9620018de8..a5c104a4a9 100644 --- a/include/phasar/Utils/Macros.h +++ b/include/phasar/Utils/Macros.h @@ -18,4 +18,12 @@ #define PSR_CONCEPT concept #endif +#if __cpp_constinit >= 201907L +#define PSR_CONSTINIT constinit +#elif __clang__ +#define PSR_CONSTINIT [[clang::require_constant_initialization]] +#else +#define PSR_CONSTINIT +#endif + #endif // PHASAR_UTILS_MACROS_H diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index 612732679f..a4265d36fc 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -251,7 +251,8 @@ auto remove_by_index(Container &Cont, const Indices &Idx) { /// See template -[[nodiscard]] constexpr auto &&forward_like(U &&X) noexcept { // NOLINT +[[nodiscard]] LLVM_ATTRIBUTE_ALWAYS_INLINE constexpr auto && +forward_like(U &&X) noexcept { // NOLINT // NOLINTNEXTLINE constexpr bool is_adding_const = std::is_const_v>; if constexpr (std::is_lvalue_reference_v) { diff --git a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp index e6a53249bb..cffe27cb89 100644 --- a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp +++ b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp @@ -15,6 +15,8 @@ #include "phasar/Utils/Logger.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include @@ -61,6 +63,33 @@ collectGlobalDtors(const llvm::Module &Mod) { return Ret; } +static llvm::SmallVector +collectRegisteredDtorsAtExit(const llvm::Module &Mod) { + llvm::SmallVector Ret; + + auto *AtExitFn = Mod.getFunction("atexit"); + if (!AtExitFn) { + return Ret; + } + + for (auto *User : AtExitFn->users()) { + auto *Call = llvm::dyn_cast(User); + if (!Call) { + continue; + } + + auto *DtorOp = llvm::dyn_cast_or_null( + Call->getArgOperand(0)->stripPointerCastsAndAliases()); + if (!DtorOp) { + continue; + } + + Ret.push_back(DtorOp); + } + + return Ret; +} + static llvm::SmallVector, 4> collectRegisteredDtorsForModule(const llvm::Module &Mod) { // NOLINTNEXTLINE @@ -72,16 +101,6 @@ collectRegisteredDtorsForModule(const llvm::Module &Mod) { return RegisteredDtors; } - auto getConstantBitcastArgument = // NOLINT - [](llvm::Value *V) -> llvm::Value * { - auto *CE = llvm::dyn_cast(V); - if (!CE) { - return V; - } - - return CE->getOperand(0); - }; - for (auto *User : CxaAtExitFn->users()) { auto *Call = llvm::dyn_cast(User); if (!Call) { @@ -89,8 +108,8 @@ collectRegisteredDtorsForModule(const llvm::Module &Mod) { } auto *DtorOp = llvm::dyn_cast_or_null( - getConstantBitcastArgument(Call->getArgOperand(0))); - auto *DtorArgOp = getConstantBitcastArgument(Call->getArgOperand(1)); + Call->getArgOperand(0)->stripPointerCastsAndAliases()); + auto *DtorArgOp = Call->getArgOperand(1)->stripPointerCastsAndAliases(); if (!DtorOp || !DtorArgOp) { continue; @@ -122,8 +141,9 @@ static std::string getReducedModuleName(const llvm::Module &M) { static llvm::Function *createDtorCallerForModule( llvm::Module &Mod, - const llvm::SmallVectorImpl> - &RegisteredDtors) { + llvm::ArrayRef> + RegisteredDtors, + llvm::ArrayRef RegisteredDtorsAtExit) { auto *PhasarDtorCaller = llvm::cast( Mod.getOrInsertFunction((GlobalCtorsDtorsModel::DtorsCallerName + @@ -137,12 +157,14 @@ static llvm::Function *createDtorCallerForModule( llvm::IRBuilder<> IRB(BB); - for (auto It = RegisteredDtors.rbegin(), End = RegisteredDtors.rend(); - It != End; ++It) { - auto FunCallee = It->first; + for (auto FunCallee : llvm::reverse(RegisteredDtorsAtExit)) { + IRB.CreateCall(FunCallee, {}); + } + + for (auto [FunCallee, Arg] : llvm::reverse(RegisteredDtors)) { assert(FunCallee.getFunctionType()->getNumParams() == 1); auto *ExpectedArgType = FunCallee.getFunctionType()->getParamType(0); - auto *Arg = It->second; + if (Arg->getType() != ExpectedArgType) { if (!Arg->getType()->canLosslesslyBitCastTo(ExpectedArgType)) { PHASAR_LOG_LEVEL( @@ -171,8 +193,9 @@ static llvm::Function *createDtorCallerForModule( "Collect Registered Dtors for Module " << Mod.getName()); auto RegisteredDtors = collectRegisteredDtorsForModule(Mod); + auto RegisteredDtorsAtExit = collectRegisteredDtorsAtExit(Mod); - if (RegisteredDtors.empty()) { + if (RegisteredDtors.empty() && RegisteredDtorsAtExit.empty()) { return nullptr; } @@ -180,7 +203,8 @@ static llvm::Function *createDtorCallerForModule( "> Found " << RegisteredDtors.size() << " Registered Dtors"); - auto *RegisteredDtorCaller = createDtorCallerForModule(Mod, RegisteredDtors); + auto *RegisteredDtorCaller = + createDtorCallerForModule(Mod, RegisteredDtors, RegisteredDtorsAtExit); // auto It = GlobalDtors.emplace(0, RegisteredDtorCaller); // GlobalDtorFn.try_emplace(RegisteredDtorCaller, it); diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 10ea6c257c..e3f65c00ec 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -6,7 +6,7 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" -#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Soundness.h" @@ -111,8 +111,8 @@ static bool fillPossibleTargets( PossibleTargets.insert(StaticCallee); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found static call-site: " - << " " << llvmIRToString(CS)); + "Found static call-site: " << " " + << llvmIRToString(CS)); return true; } @@ -122,8 +122,8 @@ static bool fillPossibleTargets( // the function call must be resolved dynamically PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found dynamic call-site: " - << " " << llvmIRToString(CS)); + "Found dynamic call-site: " << " " + << llvmIRToString(CS)); PossibleTargets = Res.resolveIndirectCall(CS); diff --git a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp index 45820e2793..4abbc34fd3 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp @@ -16,14 +16,23 @@ #include "llvm/Support/Casting.h" using namespace psr; +static constexpr llvm::StringLiteral TIPrefix = "typeinfo name for "; static std::string getTypeName(const llvm::DIType *DITy) { - if (const auto *CompTy = llvm::dyn_cast(DITy)) { - auto Ident = CompTy->getIdentifier(); - return Ident.empty() ? llvm::demangle(CompTy->getName().str()) - : llvm::demangle(Ident.str()); + auto Ret = [DITy] { + if (const auto *CompTy = llvm::dyn_cast(DITy)) { + auto Ident = CompTy->getIdentifier(); + return Ident.empty() ? llvm::demangle(CompTy->getName().str()) + : llvm::demangle(Ident.str()); + } + return llvm::demangle(DITy->getName().str()); + }(); + + if (llvm::StringRef(Ret).startswith(TIPrefix)) { + Ret.erase(0, TIPrefix.size()); } - return llvm::demangle(DITy->getName().str()); + + return Ret; } static std::vector getVirtualFunctions( @@ -31,11 +40,6 @@ static std::vector getVirtualFunctions( const llvm::DIType *Type) { auto ClearName = getTypeName(Type); - static constexpr llvm::StringLiteral TIPrefix = "typeinfo name for "; - if (llvm::StringRef(ClearName).startswith(TIPrefix)) { - ClearName = ClearName.substr(TIPrefix.size()); - } - auto It = ClearNameTVMap.find(ClearName); if (It != ClearNameTVMap.end()) { @@ -53,8 +57,6 @@ static std::vector getVirtualFunctions( } LLVMVFTableProvider::LLVMVFTableProvider(const llvm::Module &Mod) { - llvm::StringMap ClearNameTVMap; - for (const auto &Glob : Mod.globals()) { if (DIBasedTypeHierarchy::isVTable(Glob.getName())) { auto Demang = llvm::demangle(Glob.getName().str()); @@ -88,3 +90,19 @@ LLVMVFTableProvider::getVFTableOrNull(const llvm::DIType *Type) const { auto It = TypeVFTMap.find(Type); return It != TypeVFTMap.end() ? &It->second : nullptr; } + +const llvm::GlobalVariable * +LLVMVFTableProvider::getVFTableGlobal(const llvm::DIType *Type) const { + auto Name = getTypeName(Type); + return getVFTableGlobal(Name); +} + +const llvm::GlobalVariable * +LLVMVFTableProvider::getVFTableGlobal(llvm::StringRef ClearTypeName) const { + // llvm::errs() << "[getVFTableGlobal]: " << ClearTypeName << '\n'; + if (auto It = ClearNameTVMap.find(ClearTypeName); + It != ClearNameTVMap.end()) { + return It->second; + } + return nullptr; +} diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index ba464cd0a6..15dcdf0e1c 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -76,7 +76,7 @@ auto CHAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) for (const auto &FallbackTy : FallbackTys) { const auto *Target = getNonPureVirtualVFTEntry(FallbackTy, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallees.insert(Target); } } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 05342b05ac..05b9e006d4 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -18,17 +18,16 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" @@ -76,7 +75,7 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) if (ReachableTypes.find(PossibleType) != EndIt) { const auto *Target = getNonPureVirtualVFTEntry(PossibleType, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallTargets.insert(Target); } } @@ -91,18 +90,52 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) std::string RTAResolver::str() const { return "RTA"; } -/// More or less copied from GeneralStatisticsAnalysis +static const llvm::DICompositeType * +isCompositeStructType(const llvm::DIType *Ty) { + if (const auto *CompTy = llvm::dyn_cast_if_present(Ty); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + return CompTy; + } + + return nullptr; +} + void RTAResolver::resolveAllocatedCompositeTypes() { if (!AllocatedCompositeTypes.empty()) { return; } - llvm::DebugInfoFinder DIF; - DIF.processModule(*IRDB->getModule()); + llvm::DenseSet AllocatedTypes; - for (const auto *Ty : DIF.types()) { - if (const auto *CompTy = llvm::dyn_cast(Ty)) { - AllocatedCompositeTypes.push_back(CompTy); + for (const auto *Inst : IRDB->getAllInstructions()) { + if (const auto *Alloca = llvm::dyn_cast(Inst)) { + if (const auto *Ty = isCompositeStructType(getVarTypeFromIR(Alloca))) { + AllocatedTypes.insert(Ty); + } + } else if (const auto *Call = llvm::dyn_cast(Inst)) { + if (const auto *Callee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + if (psr::isHeapAllocatingFunction(Callee)) { + const auto *MDNode = Call->getMetadata("heapallocsite"); + if (const auto *CompTy = llvm:: +#if LLVM_VERSION_MAJOR >= 15 + dyn_cast_if_present +#else + dyn_cast_or_null +#endif + (MDNode); + isCompositeStructType(CompTy)) { + + AllocatedTypes.insert(CompTy); + } + } + } } } + + AllocatedCompositeTypes.reserve(AllocatedTypes.size()); + AllocatedCompositeTypes.insert(AllocatedCompositeTypes.end(), + AllocatedTypes.begin(), AllocatedTypes.end()); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 9065a43415..7153b557f7 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -62,7 +62,7 @@ std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { } const llvm::DIType *psr::getReceiverType(const llvm::CallBase *CallSite) { - if (CallSite->arg_empty() || + if (!CallSite || CallSite->arg_empty() || (CallSite->hasStructRetAttr() && CallSite->arg_size() < 2)) { return nullptr; } @@ -149,7 +149,6 @@ namespace psr { Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : IRDB(IRDB), VTP(VTP) { assert(IRDB != nullptr); - assert(VTP != nullptr); } void Resolver::preCall(const llvm::Instruction *Inst) {} diff --git a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp index 2bb171e1be..d27aadbca4 100644 --- a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp +++ b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp @@ -3,20 +3,25 @@ #include "phasar/Config/Configuration.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/Macros.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SourceMgr.h" #include +#include +#include namespace psr { @@ -35,6 +40,45 @@ static void setOpaquePointersForCtx(llvm::LLVMContext &Ctx, bool Enable) { #endif } +namespace { +enum class IRDBParsingError { + CouldNotParse = 1, + CouldNotVerify = 2, +}; + +class IRDBParsingErrorCategory : public std::error_category { + [[nodiscard]] const char *name() const noexcept override { + return "IRDBParsingError"; + } + + [[nodiscard]] std::string message(int Value) const override { + switch (IRDBParsingError(Value)) { + case IRDBParsingError::CouldNotParse: + return "Could not parse LLVM IR"; + case IRDBParsingError::CouldNotVerify: + return "Parsed LLVM IR could not be verified"; + default: + ""; + } + } +}; + +PSR_CONSTINIT IRDBParsingErrorCategory IRDBParsingErrorCat{}; + +std::error_code make_error_code(IRDBParsingError Err) noexcept { + // TODO + return {int(Err), IRDBParsingErrorCat}; +} +} // namespace + +} // namespace psr + +namespace std { +template <> struct is_error_code_enum : true_type {}; +} // namespace std + +namespace psr { + std::unique_ptr LLVMProjectIRDB::getParsedIRModuleOrNull(llvm::MemoryBufferRef IRFileContent, llvm::LLVMContext &Ctx) noexcept { @@ -76,10 +120,42 @@ LLVMProjectIRDB::getParsedIRModuleOrNull(const llvm::Twine &IRFileName, return getParsedIRModuleOrNull(*FileOrErr.get(), Ctx); } +llvm::ErrorOr +LLVMProjectIRDB::load(const llvm::Twine &IRFileName, + bool EnableOpaquePointers) { + auto FileOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(IRFileName, /*IsText=*/true); + if (!FileOrErr) { + return FileOrErr.getError(); + } + + auto Ctx = std::make_unique(); + + llvm::SMDiagnostic Diag; + std::unique_ptr M = llvm::parseIR(**FileOrErr, Diag, *Ctx); + bool BrokenDebugInfo = false; + if (M == nullptr) { + Diag.print(nullptr, llvm::errs()); + return IRDBParsingError::CouldNotParse; + } + + if (llvm::verifyModule(*M, &llvm::errs(), &BrokenDebugInfo)) { + PHASAR_LOG_LEVEL(ERROR, FileOrErr.get()->getBufferIdentifier() + << " could not be parsed correctly!"); + return IRDBParsingError::CouldNotVerify; + } + if (BrokenDebugInfo) { + PHASAR_LOG_LEVEL(WARNING, "Debug info is broken!"); + } + + return LLVMProjectIRDB(std::move(M), std::move(Ctx), EnableOpaquePointers); +} + LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName, - bool EnableOpaquePointers) { - setOpaquePointersForCtx(Ctx, EnableOpaquePointers); - auto M = getParsedIRModuleOrNull(IRFileName, Ctx); + bool EnableOpaquePointers) + : Ctx(new llvm::LLVMContext()) { + setOpaquePointersForCtx(*Ctx, EnableOpaquePointers); + auto M = getParsedIRModuleOrNull(IRFileName, *Ctx); if (!M) { return; @@ -171,10 +247,18 @@ LLVMProjectIRDB::LLVMProjectIRDB(std::unique_ptr Mod, } } +LLVMProjectIRDB::LLVMProjectIRDB(std::unique_ptr Mod, + std::unique_ptr Ctx, + bool DoPreprocessing) + : LLVMProjectIRDB(std::move(Mod), DoPreprocessing) { + this->Ctx = std::move(Ctx); +} + LLVMProjectIRDB::LLVMProjectIRDB(llvm::MemoryBufferRef Buf, - bool EnableOpaquePointers) { - setOpaquePointersForCtx(Ctx, EnableOpaquePointers); - auto M = getParsedIRModuleOrNull(Buf, Ctx); + bool EnableOpaquePointers) + : Ctx(new llvm::LLVMContext()) { + setOpaquePointersForCtx(*Ctx, EnableOpaquePointers); + auto M = getParsedIRModuleOrNull(Buf, *Ctx); if (!M) { return; } diff --git a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp index 2766f8e5a9..f6ed829e87 100644 --- a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp @@ -180,7 +180,7 @@ auto FilteredLLVMAliasSet::getReachableAllocationSites( return &getDefaultValue(); } - const auto *Fun = I->getFunction(); + const auto *Fun = I ? I->getFunction() : nullptr; auto &AllocSites = ReachableAllocationSitesMap[ReachableAllocationSitesKey{ {Fun, IntraProcOnly}, V}]; if (AllocSites) { @@ -222,6 +222,11 @@ auto FilteredLLVMAliasSet::getReachableAllocationSites( bool FilteredLLVMAliasSet::isInReachableAllocationSites( const llvm::Value *V, const llvm::Value *PotentialValue, bool IntraProcOnly, const llvm::Instruction *I) { + + if (PotentialValue == V) { + return true; + } + // if V is not a (interesting) pointer we can return an empty set if (!isInterestingPointer(V)) { return false; diff --git a/lib/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.cpp b/lib/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.cpp index 36319dc05a..3d854848b4 100644 --- a/lib/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.cpp +++ b/lib/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.cpp @@ -23,9 +23,7 @@ #include "llvm/Demangle/Demangle.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.cpp b/lib/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.cpp index 028abe8659..9aef06815b 100644 --- a/lib/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.cpp +++ b/lib/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.cpp @@ -20,9 +20,6 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/NlohmannLogging.h" -#include "phasar/Utils/PAMMMacros.h" -#include "phasar/Utils/Utilities.h" #include "llvm/ADT/StringMap.h" #include "llvm/Demangle/Demangle.h" @@ -30,18 +27,12 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Support/Format.h" #include "boost/graph/graphviz.hpp" #include "boost/graph/transitive_closure.hpp" -#include #include -#include #include using namespace std; diff --git a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp index ac36114d33..a25e31ff55 100644 --- a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp +++ b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -180,8 +181,23 @@ getOffsetAndBase(const llvm::Value *V) { return {Base, Offset}; } +namespace t2 { +LLVM_DUMP_METHOD extern void dumpDIType(const llvm::DIType *Ty) { + if (!Ty) { + llvm::errs() << "\n"; + } + + Ty->print(llvm::errs()); +} +LLVM_DUMP_METHOD extern void dumpDIType(const llvm::DIDerivedType *Ty) { + dumpDIType(static_cast(Ty)); +} + +} // namespace t2 + static llvm::DIType *getStructElementType(llvm::DIType *BaseTy, size_t Offset) { - const auto *DerivedTy = llvm::dyn_cast(BaseTy); + const auto *DerivedTy = + llvm::dyn_cast_if_present(BaseTy); auto *StructTy = DerivedTy ? DerivedTy->getBaseType() : BaseTy; if (Offset == 0 && DerivedTy) { @@ -189,13 +205,13 @@ static llvm::DIType *getStructElementType(llvm::DIType *BaseTy, size_t Offset) { } if (const auto *CompositeTy = - llvm::dyn_cast(StructTy)) { - if (Offset > CompositeTy->getElements().size()) { + llvm::dyn_cast_if_present(StructTy)) { + auto Elems = CompositeTy->getElements(); + if (!Elems || Offset >= Elems.size()) { return nullptr; } - auto Elems = CompositeTy->getElements(); - if (auto *ElemTy = llvm::dyn_cast(Elems[Offset])) { + if (auto *ElemTy = llvm::dyn_cast_if_present(Elems[Offset])) { return ElemTy; } } @@ -239,6 +255,10 @@ static llvm::DIType *getVarTypeFromIRRec(const llvm::Value *V, size_t Depth) { } llvm::DIType *psr::getVarTypeFromIR(const llvm::Value *V) { + if (!V) { + return nullptr; + } + return getVarTypeFromIRRec(V, 0); } diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 2b8fac6aa3..2b13575ff6 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -206,7 +206,9 @@ std::string psr::llvmIRToShortString(const llvm::Value *V) { I && !I->getType()->isVoidTy()) { V->printAsOperand(RSO, true, getModuleSlotTrackerFor(V)); } else if (const auto *F = llvm::dyn_cast(V)) { - RSO << F->getName(); + RSO << "fun @" << F->getName(); + } else if (const auto *Glob = llvm::dyn_cast(V)) { + RSO << "glob @" << Glob->getName(); } else { V->print(RSO, getModuleSlotTrackerFor(V)); } @@ -238,6 +240,9 @@ void psr::dumpIRValue(const llvm::Value *V) { void psr::dumpIRValue(const llvm::Instruction *V) { llvm::outs() << llvmIRToString(V) << '\n'; } +void psr::dumpIRValue(const llvm::Function *V) { + llvm::outs() << llvmIRToString(V) << '\n'; +} std::vector psr::globalValuesUsedinFunction(const llvm::Function *F) { @@ -317,15 +322,16 @@ const llvm::Instruction *psr::getNthInstruction(const llvm::Function *F, } llvm::SmallVector -psr::getAllExitPoints(const llvm::Function *F) { +psr::getAllExitPoints(const llvm::Function *F, bool IncludeResume) { llvm::SmallVector Ret; - appendAllExitPoints(F, Ret); + appendAllExitPoints(F, Ret, IncludeResume); return Ret; } void psr::appendAllExitPoints( const llvm::Function *F, - llvm::SmallVectorImpl &ExitPoints) { + llvm::SmallVectorImpl &ExitPoints, + bool IncludeResume) { if (!F) { return; } @@ -335,7 +341,7 @@ void psr::appendAllExitPoints( assert(Term && "Invalid IR: Each BasicBlock must have a terminator " "instruction at the end"); if (llvm::isa(Term) || - llvm::isa(Term)) { + (IncludeResume && llvm::isa(Term))) { ExitPoints.push_back(Term); } } diff --git a/test/llvm_test_code/virtual_callsites/CMakeLists.txt b/test/llvm_test_code/virtual_callsites/CMakeLists.txt index 4a329e510e..a4f9d3e0c8 100644 --- a/test/llvm_test_code/virtual_callsites/CMakeLists.txt +++ b/test/llvm_test_code/virtual_callsites/CMakeLists.txt @@ -11,4 +11,5 @@ set(NoMem2regSources foreach(TEST_SRC ${NoMem2regSources}) generate_ll_file(FILE ${TEST_SRC}) + generate_ll_file(FILE ${TEST_SRC} DEBUG) endforeach(TEST_SRC)