diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index dc6f7c8ff..82e2dcbbd 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -20,10 +20,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/Utils/MaybeUniquePtr.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { class DIBasedTypeHierarchy; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 88afa796e..eb6800dc1 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -12,10 +12,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { /// \brief A resolver that doesn't resolve indirect- and virtual calls diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index eca760ae7..6cddc84c9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -20,22 +20,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include -#include -#include -#include - -namespace llvm { -class CallBase; -class Function; -class Type; -class Value; -} // namespace llvm - namespace psr { -class DIBasedTypeHierarchy; - /// \brief A resolver that uses alias information to resolve indirect and /// virtual calls class OTFResolver : public Resolver { @@ -52,18 +38,11 @@ class OTFResolver : public Resolver { FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite) override; - static std::set - getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values); - - static std::vector> - getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget); - [[nodiscard]] std::string str() const override; [[nodiscard]] bool mutatesHelperAnalysisInformation() const noexcept override { - return true; + return !PT.isInterProcedural(); } protected: diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index c6e003211..cc3c99d03 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -22,12 +22,10 @@ #include namespace llvm { -class CallBase; class DICompositeType; } // namespace llvm namespace psr { -class DIBasedTypeHierarchy; /// \brief A resolver that performs Rapid Type Analysis to resolve calls /// to C++ virtual functions. Requires debug information. diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index c59717c25..483b14ee4 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -20,6 +20,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DerivedTypes.h" #include @@ -44,6 +45,10 @@ enum class CallGraphAnalysisType; [[nodiscard]] std::optional getVFTIndex(const llvm::CallBase *CallSite); +/// Similar to getVFTIndex(), but also returns a pointer to the vtable +[[nodiscard]] std::optional> +getVFTIndexAndVT(const llvm::CallBase *CallSite); + /// Assuming that `CallSite` is a call to a non-static member function, /// retrieves the type of the receiver. Returns nullptr, if the receiver-type /// could not be extracted @@ -68,24 +73,16 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); +/// A variant of F->hasAddressTaken() that is better suited for our use cases. +/// +/// Especially, it filteres out global aliases. +[[nodiscard]] bool isAddressTakenFunction(const llvm::Function *F); + /// \brief A base class for call-target resolvers. Used to build call graphs. /// /// Create a specific resolver by making a new class, inheriting this resolver /// class and implementing the virtual functions as needed. class Resolver { -protected: - const LLVMProjectIRDB *IRDB; - const LLVMVFTableProvider *VTP; - - const llvm::Function * - getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, - const llvm::CallBase *CallSite) { - if (!VTP) { - return nullptr; - } - return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP); - } - public: using FunctionSetTy = llvm::SmallDenseSet; @@ -93,12 +90,16 @@ class Resolver { virtual ~Resolver() = default; - virtual void preCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + preCall(const llvm::Instruction *Inst); virtual void handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &PossibleTargets); - virtual void postCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + postCall(const llvm::Instruction *Inst); [[nodiscard]] FunctionSetTy resolveIndirectCall(const llvm::CallBase *CallSite); @@ -109,7 +110,9 @@ class Resolver { [[nodiscard]] virtual FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite); - virtual void otherInst(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + otherInst(const llvm::Instruction *Inst); [[nodiscard]] virtual std::string str() const = 0; @@ -117,11 +120,29 @@ class Resolver { // Conservatively returns true. Override if possible return true; } - static std::unique_ptr create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT = nullptr); + + [[nodiscard]] llvm::ArrayRef + getAddressTakenFunctions(); + + [[nodiscard]] static std::unique_ptr + create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT = nullptr); + +protected: + const llvm::Function * + getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, + const llvm::CallBase *CallSite) { + if (!VTP) { + return nullptr; + } + return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP); + } + + const LLVMProjectIRDB *IRDB{}; + const LLVMVFTableProvider *VTP{}; + std::optional> + AddressTakenFunctions{}; }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 10ea6c257..c759de936 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -111,8 +111,8 @@ static bool fillPossibleTargets( PossibleTargets.insert(StaticCallee); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found static call-site: " - << " " << llvmIRToString(CS)); + "Found static call-site: " << " " + << llvmIRToString(CS)); return true; } @@ -122,8 +122,8 @@ static bool fillPossibleTargets( // the function call must be resolved dynamically PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found dynamic call-site: " - << " " << llvmIRToString(CS)); + "Found dynamic call-site: " << " " + << llvmIRToString(CS)); PossibleTargets = Res.resolveIndirectCall(CS); @@ -153,13 +153,9 @@ bool Builder::processFunction(const llvm::Function *F) { for (const auto &I : llvm::instructions(F)) { const auto *CS = llvm::dyn_cast(&I); if (!CS) { - Res->otherInst(&I); continue; } - Res->preCall(&I); - scope_exit PostCall = [&] { Res->postCall(&I); }; - FixpointReached &= fillPossibleTargets(PossibleTargets, *Res, CS, IndirectCalls); @@ -203,9 +199,6 @@ bool Builder::constructDynamicCall(const llvm::Instruction *CS) { "Looking into dynamic call-site: "); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", " " << llvmIRToString(CS)); - Res->preCall(CallSite); - scope_exit PostCall = [&] { Res->postCall(CallSite); }; - // call the resolve routine auto PossibleTargets = Res->resolveIndirectCall(CallSite); @@ -275,7 +268,7 @@ auto psr::buildLLVMBasedCallGraph( PT = PTOwn.asRef(); } - auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH); + auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH, PT); return buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints, S); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index ba464cd0a..da0a71f43 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -21,15 +21,10 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" #include -using namespace std; using namespace psr; CHAResolver::CHAResolver(const LLVMProjectIRDB *IRDB, @@ -76,7 +71,7 @@ auto CHAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) for (const auto &FallbackTy : FallbackTys) { const auto *Target = getNonPureVirtualVFTEntry(FallbackTy, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallees.insert(Target); } } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp index f825f5254..f98483c2d 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp @@ -16,12 +16,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" -#include - using namespace psr; -namespace psr { - NOResolver::NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : Resolver(IRDB, VTP) {} @@ -37,5 +33,3 @@ auto NOResolver::resolveFunctionPointer(const llvm::CallBase * /*CallSite*/) } std::string NOResolver::str() const { return "NOResolver"; } - -} // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index 6e70e7de0..348aa5a5e 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -22,7 +22,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" @@ -34,6 +33,55 @@ OTFResolver::OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT) : Resolver(IRDB, VTP), PT(PT) {} +static std::vector> +getActualFormalPointerPairs(const llvm::CallBase *CallSite, + const llvm::Function *CalleeTarget) { + std::vector> Pairs; + Pairs.reserve(CallSite->arg_size()); + // ordinary case + + unsigned Idx = 0; + for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { + // only collect pointer typed pairs + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && + CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), + CalleeTarget->getArg(Idx)); + } + } + + if (CalleeTarget->isVarArg()) { + // in case of vararg, we can pair-up incoming pointer parameters with the + // vararg pack of the callee target. the vararg pack will alias + // (intra-procedurally) with any pointer values loaded from the pack + const llvm::AllocaInst *VarArgs = nullptr; + + for (const auto &I : llvm::instructions(CalleeTarget)) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (const auto *AT = + llvm::dyn_cast(Alloca->getAllocatedType())) { + if (const auto *ST = + llvm::dyn_cast(AT->getArrayElementType())) { + if (ST->hasName() && ST->getName() == "struct.__va_list_tag") { + VarArgs = Alloca; + break; + } + } + } + } + } + + if (VarArgs) { + for (; Idx < CallSite->arg_size(); ++Idx) { + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); + } + } + } + } + return Pairs; +} + void OTFResolver::handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &CalleeTargets) { // if we have no inter-procedural points-to information, use call-graph @@ -72,7 +120,7 @@ auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) PHASAR_LOG_LEVEL(DEBUG, "Call virtual function: " << llvmIRToString(CallSite)); - auto RetrievedVtableIndex = getVFTIndex(CallSite); + auto RetrievedVtableIndex = getVFTIndexAndVT(CallSite); if (!RetrievedVtableIndex.has_value()) { // An error occured PHASAR_LOG_LEVEL(DEBUG, @@ -82,11 +130,12 @@ auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) return {}; } - auto VtableIndex = RetrievedVtableIndex.value(); + auto [VtablePtr, VtableIndex] = RetrievedVtableIndex.value(); PHASAR_LOG_LEVEL(DEBUG, "Virtual function table entry is: " << VtableIndex); - auto PTS = PT.getAliasSet(CallSite->getCalledOperand(), CallSite); + auto PTS = PT.getAliasSet(VtablePtr, CallSite); + for (const auto *P : *PTS) { if (const auto *PGV = llvm::dyn_cast(P)) { if (PGV->hasName() && @@ -208,76 +257,4 @@ auto OTFResolver::resolveFunctionPointer(const llvm::CallBase *CallSite) return Callees; } -std::set -OTFResolver::getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values) { - std::set Types; - // an allocation site can either be an AllocaInst or a call to an - // allocating function - for (const auto *V : Values) { - if (const auto *Alloc = llvm::dyn_cast(V)) { - Types.insert(Alloc->getAllocatedType()); - } else { - // usually if an allocating function is called, it is immediately - // bit-casted - // to the desired allocated value and hence we can determine it from - // the destination type of that cast instruction. - for (const auto *User : V->users()) { - if (const auto *Cast = llvm::dyn_cast(User)) { - Types.insert(Cast->getDestTy()); - } - } - } - } - return Types; -} - -std::vector> -OTFResolver::getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget) { - std::vector> Pairs; - Pairs.reserve(CallSite->arg_size()); - // ordinary case - - unsigned Idx = 0; - for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { - // only collect pointer typed pairs - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && - CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), - CalleeTarget->getArg(Idx)); - } - } - - if (CalleeTarget->isVarArg()) { - // in case of vararg, we can pair-up incoming pointer parameters with the - // vararg pack of the callee target. the vararg pack will alias - // (intra-procedurally) with any pointer values loaded from the pack - const llvm::AllocaInst *VarArgs = nullptr; - - for (const auto &I : llvm::instructions(CalleeTarget)) { - if (const auto *Alloca = llvm::dyn_cast(&I)) { - if (const auto *AT = - llvm::dyn_cast(Alloca->getAllocatedType())) { - if (const auto *ST = - llvm::dyn_cast(AT->getArrayElementType())) { - if (ST->hasName() && ST->getName() == "struct.__va_list_tag") { - VarArgs = Alloca; - break; - } - } - } - } - } - - if (VarArgs) { - for (; Idx < CallSite->arg_size(); ++Idx) { - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); - } - } - } - } - return Pairs; -} - std::string OTFResolver::str() const { return "OTF"; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 05342b05a..0cfd9cdbb 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -18,22 +18,18 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" -using namespace std; using namespace psr; RTAResolver::RTAResolver(const LLVMProjectIRDB *IRDB, @@ -76,7 +72,7 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) if (ReachableTypes.find(PossibleType) != EndIt) { const auto *Target = getNonPureVirtualVFTEntry(PossibleType, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallTargets.insert(Target); } } @@ -91,18 +87,47 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) std::string RTAResolver::str() const { return "RTA"; } -/// More or less copied from GeneralStatisticsAnalysis +static const llvm::DICompositeType * +isCompositeStructType(const llvm::DIType *Ty) { + if (const auto *CompTy = llvm::dyn_cast_if_present(Ty); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + return CompTy; + } + + return nullptr; +} + void RTAResolver::resolveAllocatedCompositeTypes() { if (!AllocatedCompositeTypes.empty()) { return; } - llvm::DebugInfoFinder DIF; - DIF.processModule(*IRDB->getModule()); + llvm::DenseSet AllocatedTypes; - for (const auto *Ty : DIF.types()) { - if (const auto *CompTy = llvm::dyn_cast(Ty)) { - AllocatedCompositeTypes.push_back(CompTy); + for (const auto *Inst : IRDB->getAllInstructions()) { + if (const auto *Alloca = llvm::dyn_cast(Inst)) { + if (const auto *Ty = isCompositeStructType(getVarTypeFromIR(Alloca))) { + AllocatedTypes.insert(Ty); + } + } else if (const auto *Call = llvm::dyn_cast(Inst)) { + if (const auto *Callee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + if (psr::isHeapAllocatingFunction(Callee)) { + const auto *MDNode = Call->getMetadata("heapallocsite"); + if (const auto *CompTy = + llvm::dyn_cast_if_present(MDNode); + isCompositeStructType(CompTy)) { + + AllocatedTypes.insert(CompTy); + } + } + } } } + + AllocatedCompositeTypes.reserve(AllocatedTypes.size()); + AllocatedCompositeTypes.insert(AllocatedCompositeTypes.end(), + AllocatedTypes.begin(), AllocatedTypes.end()); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 9065a4341..ee3c3aa46 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -42,6 +43,8 @@ #include #include +using namespace psr; + std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { // deal with a virtual member function // retrieve the vtable entry that is called @@ -61,6 +64,29 @@ std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { return std::nullopt; } +std::optional> +psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { + // deal with a virtual member function + // retrieve the vtable entry that is called + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (Load == nullptr) { + return std::nullopt; + } + + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (GEP == nullptr) { + return std::nullopt; + } + + if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { + return {{GEP->getPointerOperand(), CI->getZExtValue()}}; + } + + return std::nullopt; +} + const llvm::DIType *psr::getReceiverType(const llvm::CallBase *CallSite) { if (CallSite->arg_empty() || (CallSite->hasStructRetAttr() && CallSite->arg_size() < 2)) { @@ -144,7 +170,59 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, return getVFTIndex(CallSite) >= 0; } -namespace psr { +// Derived from LLVM's llvm::Function::hasAddressTaken() +static bool isAddressTakenImpl(const llvm::Value *F) { + if (!F) { + return false; + } + + for (const auto &Use : F->uses()) { + const auto *User = Use.getUser(); + + if (llvm::isa(User)) { + if (isAddressTakenImpl(User)) { + return true; + } + + continue; + } + + if (const auto *Glob = llvm::dyn_cast(User)) { + if (Glob->getName() == "llvm.compiler.used" || + Glob->getName() == "llvm.used") { + continue; + } + + return true; + } + + const auto *Call = llvm::dyn_cast(User); + if (!Call) { + return true; + } + + if (Call->isDebugOrPseudoInst()) { + continue; + } + + const auto *Intrinsic = llvm::dyn_cast(Call); + if (Intrinsic && Intrinsic->isAssumeLikeIntrinsic()) { + continue; + } + + if (Call->isCallee(&Use)) { + continue; + } + + return true; + } + + return false; +} + +bool psr::isAddressTakenFunction(const llvm::Function *F) { + return isAddressTakenImpl(F); +} Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : IRDB(IRDB), VTP(VTP) { @@ -167,6 +245,21 @@ auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) return resolveFunctionPointer(CallSite); } +llvm::ArrayRef Resolver::getAddressTakenFunctions() { + if (!AddressTakenFunctions) { + auto &ATF = AddressTakenFunctions.emplace(); + // XXX: Find better heuristic + ATF.reserve(IRDB->getNumFunctions() / 2); + for (const auto *F : IRDB->getAllFunctions()) { + if (isAddressTakenFunction(F)) { + ATF.push_back(F); + } + } + } + + return *AddressTakenFunctions; +} + auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) -> FunctionSetTy { // we may wish to optimise this function @@ -176,8 +269,8 @@ auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) "Call function pointer: " << llvmIRToString(CallSite)); FunctionSetTy CalleeTargets; - for (const auto *F : IRDB->getAllFunctions()) { - if (F->hasAddressTaken() && isConsistentCall(CallSite, F)) { + for (const auto *F : getAddressTakenFunctions()) { + if (isConsistentCall(CallSite, F)) { CalleeTargets.insert(F); } } @@ -217,5 +310,3 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, llvm_unreachable("All possible callgraph algorithms should be handled in the " "above switch"); } - -} // namespace psr diff --git a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp index ac36114d3..5bc38c469 100644 --- a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp +++ b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp @@ -190,7 +190,7 @@ static llvm::DIType *getStructElementType(llvm::DIType *BaseTy, size_t Offset) { if (const auto *CompositeTy = llvm::dyn_cast(StructTy)) { - if (Offset > CompositeTy->getElements().size()) { + if (Offset >= CompositeTy->getElements().size()) { return nullptr; } auto Elems = CompositeTy->getElements();