From bb86c25e6c315a2aedf6a3400094e1ec9d29e1eb Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Tue, 22 Oct 2024 19:26:36 +0200 Subject: [PATCH] Split LLVMBasedICFG (#726) * Out-source globals model * Mode call-graph construction * small refactor * Cleanup resolver headers * buildCallGraph speedup for non-OTF resolvers * Move isVirtualCall into Resolver + some refactoring * Move EntryFunctionUtils into phasar_llvm_controlflow to avoid circular library dependencies between phasar_llvm_db and phasar_llvm_utils * Add more constness --- include/phasar/PhasarLLVM/ControlFlow.h | 1 + .../ControlFlow/EntryFunctionUtils.h | 31 ++ .../ControlFlow/GlobalCtorsDtorsModel.h | 45 +++ .../ControlFlow/LLVMBasedCallGraph.h | 25 ++ .../ControlFlow/LLVMBasedCallGraphBuilder.h | 49 +++ .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 37 +- .../ControlFlow/Resolver/CHAResolver.h | 6 +- .../ControlFlow/Resolver/DTAResolver.h | 28 +- .../ControlFlow/Resolver/NOResolver.h | 24 +- .../ControlFlow/Resolver/OTFResolver.h | 16 +- .../ControlFlow/Resolver/RTAResolver.h | 7 +- .../ControlFlow/Resolver/Resolver.h | 18 +- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 4 +- .../ControlFlow/EntryFunctionUtils.cpp | 68 ++++ ...balsImpl.cpp => GlobalCtorsDtorsModel.cpp} | 52 ++- .../ControlFlow/LLVMBasedCallGraphBuilder.cpp | 306 ++++++++++++++++ lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 334 +----------------- .../ControlFlow/Resolver/NOResolver.cpp | 13 +- .../ControlFlow/Resolver/OTFResolver.cpp | 4 - .../ControlFlow/Resolver/Resolver.cpp | 35 +- .../LLVMBasedICFGGlobCtorDtorTest.cpp | 15 +- .../ControlFlow/LLVMBasedICFGTest.cpp | 3 +- 22 files changed, 692 insertions(+), 429 deletions(-) create mode 100644 include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h create mode 100644 lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp rename lib/PhasarLLVM/ControlFlow/{LLVMBasedICFGGlobalsImpl.cpp => GlobalCtorsDtorsModel.cpp} (86%) create mode 100644 lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp diff --git a/include/phasar/PhasarLLVM/ControlFlow.h b/include/phasar/PhasarLLVM/ControlFlow.h index 0f4b30135..5ab99e536 100644 --- a/include/phasar/PhasarLLVM/ControlFlow.h +++ b/include/phasar/PhasarLLVM/ControlFlow.h @@ -10,6 +10,7 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_H #define PHASAR_PHASARLLVM_CONTROLFLOW_H +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" diff --git a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h new file mode 100644 index 000000000..165bc2022 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h @@ -0,0 +1,31 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_UTILS_ENTRYFUNCTIONUTILS_H +#define PHASAR_PHASARLLVM_UTILS_ENTRYFUNCTIONUTILS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/Function.h" + +#include +#include + +namespace psr { +class LLVMProjectIRDB; + +[[nodiscard]] std::vector +getEntryFunctions(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints); + +[[nodiscard]] std::vector +getEntryFunctionsMut(LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints); +} // namespace psr + +#endif // PHASAR_PHASARLLVM_UTILS_ENTRYFUNCTIONUTILS_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h new file mode 100644 index 000000000..348f341f4 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h @@ -0,0 +1,45 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_GLOBALCTORSDTORSMODEL_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_GLOBALCTORSDTORSMODEL_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/Function.h" + +namespace psr { +class LLVMProjectIRDB; + +class GlobalCtorsDtorsModel { +public: + static constexpr llvm::StringLiteral ModelName = + "__psrCRuntimeGlobalCtorsModel"; + + static constexpr llvm::StringLiteral DtorModelName = + "__psrCRuntimeGlobalDtorsModel"; + + static constexpr llvm::StringLiteral DtorsCallerName = + "__psrGlobalDtorsCaller"; + + static constexpr llvm::StringLiteral UserEntrySelectorName = + "__psrCRuntimeUserEntrySelector"; + + static llvm::Function * + buildModel(LLVMProjectIRDB &IRDB, + llvm::ArrayRef UserEntryPoints); + static llvm::Function * + buildModel(LLVMProjectIRDB &IRDB, + llvm::ArrayRef UserEntryPoints); + + /// Returns true, if a function was generated by phasar. + [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &F) noexcept; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_GLOBALCTORSDTORSMODEL_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h new file mode 100644 index 000000000..05d1c34c3 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h @@ -0,0 +1,25 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPH_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPH_H + +#include "phasar/ControlFlow/CallGraph.h" + +namespace llvm { +class Instruction; +class Function; +} // namespace llvm + +namespace psr { +using LLVMBasedCallGraph = + CallGraph; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPH_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h new file mode 100644 index 000000000..bcce15544 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPHBUILDER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPHBUILDER_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/Soundness.h" + +namespace psr { +class LLVMProjectIRDB; +enum class CallGraphAnalysisType; +class LLVMTypeHierarchy; +class LLVMVFTableProvider; +class Resolver; + +[[nodiscard]] LLVMBasedCallGraph +buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, + LLVMTypeHierarchy &TH, LLVMVFTableProvider &VTP, + LLVMAliasInfoRef PT = nullptr, + Soundness S = Soundness::Soundy); + +[[nodiscard]] LLVMBasedCallGraph +buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, + llvm::ArrayRef EntryPoints, + Soundness S = Soundness::Soundy); + +[[nodiscard]] LLVMBasedCallGraph +buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, + LLVMTypeHierarchy &TH, LLVMVFTableProvider &VTP, + LLVMAliasInfoRef PT = nullptr, + Soundness S = Soundness::Soundy); + +[[nodiscard]] LLVMBasedCallGraph +buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, + llvm::ArrayRef EntryPoints, + Soundness S = Soundness::Soundy); +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPHBUILDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index e8b57b729..5af8300cd 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -20,10 +20,13 @@ #include "phasar/ControlFlow/CallGraph.h" #include "phasar/ControlFlow/CallGraphAnalysisType.h" #include "phasar/ControlFlow/ICFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/Soundness.h" #include "llvm/ADT/ArrayRef.h" @@ -46,20 +49,10 @@ template <> struct CFGTraits : CFGTraits {}; class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { friend ICFGBase; - struct Builder; - public: + // For backward compatibility static constexpr llvm::StringLiteral GlobalCRuntimeModelName = - "__psrCRuntimeGlobalCtorsModel"; - - static constexpr llvm::StringLiteral GlobalCRuntimeDtorModelName = - "__psrCRuntimeGlobalDtorsModel"; - - static constexpr llvm::StringLiteral GlobalCRuntimeDtorsCallerName = - "__psrGlobalDtorsCaller"; - - static constexpr llvm::StringLiteral GlobalCRuntimeUserEntrySelectorName = - "__psrCRuntimeUserEntrySelector"; + GlobalCtorsDtorsModel::ModelName; /// Constructs the ICFG based on the given IRDB and the entry-points using a /// fixpoint iteration. This may take a long time. @@ -95,9 +88,9 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { bool IncludeGlobals = true); /// Creates an ICFG with an already given call-graph - explicit LLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB); + explicit LLVMBasedICFG(CallGraph CG, const LLVMProjectIRDB *IRDB); - explicit LLVMBasedICFG(LLVMProjectIRDB *IRDB, + explicit LLVMBasedICFG(const LLVMProjectIRDB *IRDB, const CallGraphData &SerializedCG); // Deleter of LLVMTypeHierarchy may be unknown here... @@ -131,10 +124,13 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { } /// Gets the underlying IRDB - [[nodiscard]] LLVMProjectIRDB *getIRDB() const noexcept { return IRDB; } + [[nodiscard]] const LLVMProjectIRDB *getIRDB() const noexcept { return IRDB; } /// Returns true, if a function was generated by phasar. - [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &) noexcept; + [[nodiscard]] static bool + isPhasarGenerated(const llvm::Function &F) noexcept { + return GlobalCtorsDtorsModel::isPhasarGenerated(F); + } using CFGBase::print; using ICFGBase::print; @@ -157,7 +153,7 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { void printImpl(llvm::raw_ostream &OS) const; void printAsJsonImpl(llvm::raw_ostream &OS) const; [[nodiscard, deprecated]] nlohmann::json getAsJsonImpl() const; - [[nodiscard]] const CallGraph &getCallGraphImpl() const noexcept { + [[nodiscard]] const LLVMBasedCallGraph &getCallGraphImpl() const noexcept { return CG; } @@ -165,14 +161,13 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { llvm::Module &M, llvm::ArrayRef UserEntryPoints); void initialize(LLVMProjectIRDB *IRDB, Resolver &CGResolver, - llvm::ArrayRef EntryPoints, - const LLVMVFTableProvider &VTP, Soundness S, + llvm::ArrayRef EntryPoints, Soundness S, bool IncludeGlobals); // --- - CallGraph CG; - LLVMProjectIRDB *IRDB = nullptr; + LLVMBasedCallGraph CG; + const LLVMProjectIRDB *IRDB = nullptr; LLVMVFTableProvider VTP; }; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 299a600ea..7d4bf8842 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -22,7 +22,6 @@ namespace llvm { class CallBase; -class Function; } // namespace llvm namespace psr { @@ -38,6 +37,11 @@ class CHAResolver : public Resolver { [[nodiscard]] std::string str() const override; + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + protected: MaybeUniquePtr TH; }; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h index 7b4484f6e..371b6cfc8 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/DTAResolver.h @@ -37,6 +37,22 @@ class DTAResolver : public CHAResolver { public: using TypeGraph_t = CachedTypeGraph; + DTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const LLVMTypeHierarchy *TH); + + ~DTAResolver() override = default; + + FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; + + void otherInst(const llvm::Instruction *Inst) override; + + [[nodiscard]] std::string str() const override; + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + protected: TypeGraph_t TypeGraph; @@ -53,18 +69,6 @@ class DTAResolver : public CHAResolver { * of vtable) */ bool heuristicAntiConstructorVtablePos(const llvm::BitCastInst *BitCast); - -public: - DTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const LLVMTypeHierarchy *TH); - - ~DTAResolver() override = default; - - FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; - - void otherInst(const llvm::Instruction *Inst) override; - - [[nodiscard]] std::string str() const override; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 7fa845080..376eb5962 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -13,39 +13,27 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" namespace llvm { -class Instruction; class CallBase; -class Function; -class StructType; } // namespace llvm namespace psr { class NOResolver final : public Resolver { -protected: - const llvm::Function * - getNonPureVirtualVFTEntry(const llvm::StructType *T, unsigned Idx, - const llvm::CallBase *CallSite); - public: - NOResolver(const LLVMProjectIRDB *IRDB); + NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP); ~NOResolver() override = default; - void preCall(const llvm::Instruction *Inst) override; - - void handlePossibleTargets(const llvm::CallBase *CallSite, - FunctionSetTy &PossibleTargets) override; - - void postCall(const llvm::Instruction *Inst) override; - FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite) override; - void otherInst(const llvm::Instruction *Inst) override; - [[nodiscard]] std::string str() const override; + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index e278cf786..ebe8fb660 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -26,7 +26,6 @@ #include namespace llvm { -class Instruction; class CallBase; class Function; class Type; @@ -38,22 +37,15 @@ namespace psr { class LLVMTypeHierarchy; class OTFResolver : public Resolver { -protected: - LLVMAliasInfoRef PT; - public: OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT); ~OTFResolver() override = default; - void preCall(const llvm::Instruction *Inst) override; - void handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &CalleeTargets) override; - void postCall(const llvm::Instruction *Inst) override; - FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) override; FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite) override; @@ -66,6 +58,14 @@ class OTFResolver : public Resolver { const llvm::Function *CalleeTarget); [[nodiscard]] std::string str() const override; + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return true; + } + +protected: + LLVMAliasInfoRef PT; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index 6010443f0..1535fd0f8 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -24,8 +24,6 @@ namespace llvm { class CallBase; class StructType; -class Function; -class StructType; } // namespace llvm namespace psr { @@ -40,6 +38,11 @@ class RTAResolver : public CHAResolver { [[nodiscard]] std::string str() const override; + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + private: void resolveAllocatedStructTypes(); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index e8f997f17..156543886 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -49,13 +49,14 @@ getReceiverType(const llvm::CallBase *CallSite); [[nodiscard]] bool isConsistentCall(const llvm::CallBase *CallSite, const llvm::Function *DestFun); +[[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, + const LLVMVFTableProvider &VTP); + class Resolver { protected: const LLVMProjectIRDB *IRDB; const LLVMVFTableProvider *VTP; - Resolver(const LLVMProjectIRDB *IRDB); - const llvm::Function * getNonPureVirtualVFTEntry(const llvm::StructType *T, unsigned Idx, const llvm::CallBase *CallSite); @@ -74,14 +75,23 @@ class Resolver { virtual void postCall(const llvm::Instruction *Inst); - virtual FunctionSetTy resolveVirtualCall(const llvm::CallBase *CallSite) = 0; + [[nodiscard]] FunctionSetTy + resolveIndirectCall(const llvm::CallBase *CallSite); + + [[nodiscard]] virtual FunctionSetTy + resolveVirtualCall(const llvm::CallBase *CallSite) = 0; - virtual FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite); + [[nodiscard]] virtual FunctionSetTy + resolveFunctionPointer(const llvm::CallBase *CallSite); virtual void otherInst(const llvm::Instruction *Inst); [[nodiscard]] virtual std::string str() const = 0; + [[nodiscard]] virtual bool mutatesHelperAnalysisInformation() const noexcept { + // Conservatively returns true. Override if possible + return true; + } static std::unique_ptr create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 46f1b05a9..b0d8bccf9 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -19,8 +19,6 @@ #include "phasar/Utils/Utilities.h" -#include "llvm/ADT/DenseMap.h" - #include #include @@ -249,7 +247,7 @@ llvm::StringRef getVarAnnotationIntrinsicName(const llvm::CallInst *CallInst); class ModulesToSlotTracker { friend class LLVMProjectIRDB; - friend class LLVMBasedICFG; + friend class GlobalCtorsDtorsModel; friend class LLVMZeroValue; private: diff --git a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp new file mode 100644 index 000000000..06986f9c2 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp @@ -0,0 +1,68 @@ +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/Utils/Logger.h" + +std::vector +psr::getEntryFunctions(const LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints) { + std::vector UserEntryPointFns; + if (EntryPoints.size() == 1 && EntryPoints.front() == "__ALL__") { + UserEntryPointFns.reserve(IRDB.getNumFunctions()); + // Handle the special case in which a user wishes to treat all functions as + // entry points. + for (const auto *Fun : IRDB.getAllFunctions()) { + // Only functions with external linkage (or 'main') can be called from the + // outside! + if (!Fun->isDeclaration() && Fun->hasName() && + (Fun->hasExternalLinkage() || Fun->getName() == "main")) { + UserEntryPointFns.push_back(Fun); + } + } + } else { + UserEntryPointFns.reserve(EntryPoints.size()); + for (const auto &EntryPoint : EntryPoints) { + const auto *F = IRDB.getFunctionDefinition(EntryPoint); + if (F == nullptr) { + PHASAR_LOG_LEVEL(WARNING, + "Could not retrieve function for entry point '" + << EntryPoint << "'"); + continue; + } + UserEntryPointFns.push_back(F); + } + } + return UserEntryPointFns; +} + +[[nodiscard]] std::vector +psr::getEntryFunctionsMut(LLVMProjectIRDB &IRDB, + llvm::ArrayRef EntryPoints) { + std::vector UserEntryPointFns; + if (EntryPoints.size() == 1 && EntryPoints.front() == "__ALL__") { + UserEntryPointFns.reserve(IRDB.getNumFunctions()); + // Handle the special case in which a user wishes to treat all functions as + // entry points. + for (const auto *Fun : IRDB.getAllFunctions()) { + // Only functions with external linkage (or 'main') can be called from the + // outside! + if (!Fun->isDeclaration() && Fun->hasName() && + (Fun->hasExternalLinkage() || Fun->getName() == "main")) { + UserEntryPointFns.push_back(IRDB.getFunction(Fun->getName())); + } + } + } else { + UserEntryPointFns.reserve(EntryPoints.size()); + for (const auto &EntryPoint : EntryPoints) { + auto *F = IRDB.getFunctionDefinition(EntryPoint); + if (F == nullptr) { + PHASAR_LOG_LEVEL(WARNING, + "Could not retrieve function for entry point '" + << EntryPoint << "'"); + continue; + } + UserEntryPointFns.push_back(F); + } + } + return UserEntryPointFns; +} diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp similarity index 86% rename from lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp rename to lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp index eeb0b2393..e6a53249b 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp +++ b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp @@ -7,7 +7,9 @@ * Philipp Schubert, Fabian Schiebel and others *****************************************************************************/ -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" + +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" @@ -124,10 +126,10 @@ static llvm::Function *createDtorCallerForModule( &RegisteredDtors) { auto *PhasarDtorCaller = llvm::cast( - Mod.getOrInsertFunction( - LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + '.' + - getReducedModuleName(Mod), - llvm::Type::getVoidTy(Mod.getContext())) + Mod.getOrInsertFunction((GlobalCtorsDtorsModel::DtorsCallerName + + llvm::Twine('.') + getReducedModuleName(Mod)) + .str(), + llvm::Type::getVoidTy(Mod.getContext())) .getCallee()); auto *BB = @@ -196,7 +198,7 @@ static std::pair buildCRuntimeGlobalDtorsModel( auto &CTX = M.getContext(); auto *Cleanup = llvm::cast( - M.getOrInsertFunction(LLVMBasedICFG::GlobalCRuntimeDtorModelName, + M.getOrInsertFunction(GlobalCtorsDtorsModel::DtorModelName, llvm::Type::getVoidTy(CTX)) .getCallee()); @@ -217,24 +219,25 @@ static std::pair buildCRuntimeGlobalDtorsModel( return {Cleanup, true}; } -llvm::Function *LLVMBasedICFG::buildCRuntimeGlobalCtorsDtorsModel( - llvm::Module &M, llvm::ArrayRef UserEntryPoints) { +llvm::Function *GlobalCtorsDtorsModel::buildModel( + LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints) { + auto &M = *IRDB.getModule(); auto GlobalCtors = collectGlobalCtors(M); auto GlobalDtors = collectGlobalDtors(M); auto *RegisteredDtorCaller = collectRegisteredDtors(GlobalDtors, M); if (RegisteredDtorCaller) { - IRDB->insertFunction(RegisteredDtorCaller); + IRDB.insertFunction(RegisteredDtorCaller); } auto [GlobalCleanupFn, Inserted] = buildCRuntimeGlobalDtorsModel(M, GlobalDtors); if (Inserted) { - IRDB->insertFunction(GlobalCleanupFn); + IRDB.insertFunction(GlobalCleanupFn); } auto &CTX = M.getContext(); auto *GlobModel = llvm::cast( - M.getOrInsertFunction(GlobalCRuntimeModelName, + M.getOrInsertFunction(ModelName, /*retTy*/ llvm::Type::getVoidTy(CTX), /*argc*/ @@ -301,8 +304,8 @@ llvm::Function *LLVMBasedICFG::buildCRuntimeGlobalCtorsDtorsModel( IRB.CreateRetVoid(); } else { - auto UEntrySelectorFn = M.getOrInsertFunction( - GlobalCRuntimeUserEntrySelectorName, llvm::Type::getInt32Ty(CTX)); + auto UEntrySelectorFn = M.getOrInsertFunction(UserEntrySelectorName, + llvm::Type::getInt32Ty(CTX)); auto *UEntrySelector = IRB.CreateCall(UEntrySelectorFn); @@ -335,9 +338,30 @@ llvm::Function *LLVMBasedICFG::buildCRuntimeGlobalCtorsDtorsModel( IRB.CreateRetVoid(); } - IRDB->insertFunction(GlobModel); + IRDB.insertFunction(GlobModel); ModulesToSlotTracker::updateMSTForModule(&M); return GlobModel; } + +llvm::Function * +GlobalCtorsDtorsModel::buildModel(LLVMProjectIRDB &IRDB, + llvm::ArrayRef UserEntryPoints) { + auto UserEntryPointFns = getEntryFunctionsMut(IRDB, UserEntryPoints); + return buildModel(IRDB, UserEntryPointFns); +} + +bool GlobalCtorsDtorsModel::isPhasarGenerated( + const llvm::Function &F) noexcept { + if (F.hasName()) { + llvm::StringRef FunctionName = F.getName(); + return llvm::StringSwitch(FunctionName) + .Cases(ModelName, DtorModelName, DtorsCallerName, UserEntrySelectorName, + true) + .Default(false); + } + + return false; +} + } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp new file mode 100644 index 000000000..74e8a9716 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -0,0 +1,306 @@ +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" + +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/PAMMMacros.h" +#include "phasar/Utils/Soundness.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +#include + +namespace { +using namespace psr; +struct Builder { + const LLVMProjectIRDB *IRDB = nullptr; + Resolver *Res = nullptr; + CallGraphBuilder + CGBuilder{}; + llvm::DenseSet VisitedFunctions{}; + + llvm::Function *GlobalCleanupFn = nullptr; + + llvm::SmallDenseMap + GlobalRegisteredDtorsCaller{}; + + // The worklist for direct callee resolution. + llvm::SmallVector FunctionWL{}; + + // Map indirect calls to the number of possible targets found for it. Fixpoint + // is not reached when more targets are found. + llvm::DenseMap IndirectCalls{}; + + void initWorkList(llvm::ArrayRef EntryPointFns); + + [[nodiscard]] CallGraph + buildCallGraph(Soundness S); + + /// \returns FixPointReached + bool processFunction(/*bidigraph_t &Callgraph,*/ const llvm::Function *F); + /// \returns FoundNewTargets + bool constructDynamicCall(const llvm::Instruction *CS); +}; + +void Builder::initWorkList( + llvm::ArrayRef EntryPointFns) { + FunctionWL.reserve(IRDB->getNumFunctions()); + FunctionWL.append(EntryPointFns.begin(), EntryPointFns.end()); + + CGBuilder.reserve(IRDB->getNumFunctions()); +} + +auto Builder::buildCallGraph(Soundness S) -> LLVMBasedCallGraph { + PHASAR_LOG_LEVEL_CAT(INFO, "LLVMBasedICFG", + "Starting CallGraphAnalysisType: " << Res->str()); + VisitedFunctions.reserve(IRDB->getNumFunctions()); + + bool RequiresIndirectCallsFixpoint = + S != psr::Soundness::Unsound && Res->mutatesHelperAnalysisInformation(); + + bool FixpointReached; + + do { + FixpointReached = true; + while (!FunctionWL.empty()) { + const llvm::Function *F = FunctionWL.pop_back_val(); + FixpointReached &= processFunction(F); + } + + if (RequiresIndirectCallsFixpoint) { + /// XXX This can probably be done more efficiently. + /// However, we cannot just work on the IndirectCalls-delta as we are + /// mutating the points-to-info on the fly + for (auto [CS, _] : IndirectCalls) { + FixpointReached &= !constructDynamicCall(CS); + } + } + } while (!FixpointReached); + for (const auto &[IndirectCall, Targets] : IndirectCalls) { + if (Targets == 0) { + PHASAR_LOG_LEVEL(WARNING, "No callees found for callsite " + << llvmIRToString(IndirectCall)); + } + } + + PAMM_GET_INSTANCE; + REG_COUNTER("CG Functions", CGBuilder.viewCallGraph().getNumVertexFunctions(), + Full); + REG_COUNTER("CG CallSites", CGBuilder.viewCallGraph().getNumVertexCallSites(), + Full); + PHASAR_LOG_LEVEL_CAT(INFO, "LLVMBasedICFG", + "Call graph has been constructed"); + return CGBuilder.consumeCallGraph(); +} + +static bool fillPossibleTargets( + Resolver::FunctionSetTy &PossibleTargets, Resolver &Res, + const llvm::CallBase *CS, + llvm::DenseMap &IndirectCalls) { + if (const auto *StaticCallee = CS->getCalledFunction()) { + PossibleTargets.insert(StaticCallee); + + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Found static call-site: " + << " " << llvmIRToString(CS)); + return true; + } + + // still try to resolve the called function statically + const llvm::Value *SV = CS->getCalledOperand()->stripPointerCastsAndAliases(); + if (const auto *ValueFunction = llvm::dyn_cast(SV)) { + PossibleTargets.insert(ValueFunction); + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Found static call-site: " << llvmIRToString(CS)); + return true; + } + + if (llvm::isa(SV)) { + return true; + } + + // the function call must be resolved dynamically + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Found dynamic call-site: " + << " " << llvmIRToString(CS)); + + PossibleTargets = Res.resolveIndirectCall(CS); + + IndirectCalls[CS] = PossibleTargets.size(); + return false; +} + +bool Builder::processFunction(const llvm::Function *F) { + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Walking in function: " << F->getName()); + if (F->isDeclaration() || !VisitedFunctions.insert(F).second) { + PHASAR_LOG_LEVEL_CAT( + DEBUG, "LLVMBasedICFG", + "Function already visited or only declaration: " << F->getName()); + return true; + } + + assert(Res != nullptr); + + // add a node for function F to the call graph (if not present already) + std::ignore = CGBuilder.addFunctionVertex(F); + + bool FixpointReached = true; + + // iterate all instructions of the current function + Resolver::FunctionSetTy PossibleTargets; + for (const auto &I : llvm::instructions(F)) { + const auto *CS = llvm::dyn_cast(&I); + if (!CS) { + Res->otherInst(&I); + continue; + } + + Res->preCall(&I); + scope_exit PostCall = [&] { Res->postCall(&I); }; + + FixpointReached &= + fillPossibleTargets(PossibleTargets, *Res, CS, IndirectCalls); + + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Found " << PossibleTargets.size() + << " possible target(s)"); + + Res->handlePossibleTargets(CS, PossibleTargets); + + auto *CallSiteId = CGBuilder.addInstructionVertex(CS); + + // Insert possible target inside the graph and add the link with + // the current function + for (const auto *PossibleTarget : PossibleTargets) { + CGBuilder.addCallEdge(CS, CallSiteId, PossibleTarget); + FunctionWL.push_back(PossibleTarget); + } + PossibleTargets.clear(); + } + + return FixpointReached; +} + +bool Builder::constructDynamicCall(const llvm::Instruction *CS) { + const auto *CallSite = llvm::dyn_cast(CS); + if (!CallSite) { + llvm::report_fatal_error("[constructDynamicCall]: No call: " + + llvm::Twine(llvmIRToString(CS))); + } + + // Find vertex of callsite. + auto *Callees = CGBuilder.getInstVertexOrNull(CS); + if (!Callees) { + llvm::report_fatal_error( + "[constructDynamicCall]: Did not find vertex of callsite " + + llvm::Twine(llvmIRToString(CS))); + } + + // the function call must be resolved dynamically + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Looking into dynamic call-site: "); + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", " " << llvmIRToString(CS)); + + Res->preCall(CallSite); + scope_exit PostCall = [&] { Res->postCall(CallSite); }; + + // call the resolve routine + + auto PossibleTargets = Res->resolveIndirectCall(CallSite); + + assert(IndirectCalls.count(CallSite)); + auto &NumIndCalls = IndirectCalls[CallSite]; + + if (NumIndCalls >= PossibleTargets.size()) { + // No new targets found + return false; + } + + PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", + "Found " << PossibleTargets.size() - NumIndCalls + << " new possible target(s)"); + NumIndCalls = PossibleTargets.size(); + + // Throw out already found targets + for (const auto *Tgt : *Callees) { + PossibleTargets.erase(Tgt); + } + + Res->handlePossibleTargets(CallSite, PossibleTargets); + // Insert possible target inside the graph and add the link with + // the current function + for (const auto *PossibleTarget : PossibleTargets) { + CGBuilder.addCallEdge(CallSite, Callees, PossibleTarget); + FunctionWL.push_back(PossibleTarget); + } + + return true; +} +} // namespace + +auto psr::buildLLVMBasedCallGraph( + const LLVMProjectIRDB &IRDB, Resolver &CGResolver, + llvm::ArrayRef EntryPoints, Soundness S) + -> LLVMBasedCallGraph { + Builder B{&IRDB, &CGResolver}; + + B.initWorkList(EntryPoints); + + PHASAR_LOG_LEVEL_CAT( + INFO, "LLVMBasedICFG", + "Starting ICFG construction " + << std::chrono::steady_clock::now().time_since_epoch().count()); + + scope_exit FinishTiming = [] { + PHASAR_LOG_LEVEL_CAT( + INFO, "LLVMBasedICFG", + "Finished ICFG construction " + << std::chrono::steady_clock::now().time_since_epoch().count()); + }; + + return B.buildCallGraph(S); +} + +auto psr::buildLLVMBasedCallGraph( + LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, LLVMTypeHierarchy &TH, + LLVMVFTableProvider &VTP, LLVMAliasInfoRef PT, Soundness S) + -> LLVMBasedCallGraph { + + LLVMAliasInfo PTOwn; + if (!PT && CGType == CallGraphAnalysisType::OTF) { + PTOwn = std::make_unique(&IRDB); + PT = PTOwn.asRef(); + } + + auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH); + return buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints, S); +} + +auto psr::buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, + CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, + LLVMTypeHierarchy &TH, + LLVMVFTableProvider &VTP, LLVMAliasInfoRef PT, + Soundness S) -> LLVMBasedCallGraph { + auto EntryPointFns = getEntryFunctions(IRDB, EntryPoints); + return buildLLVMBasedCallGraph(IRDB, CGType, EntryPointFns, TH, VTP, PT, S); +} + +auto psr::buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, + Resolver &CGResolver, + llvm::ArrayRef EntryPoints, + Soundness S) -> LLVMBasedCallGraph { + auto EntryPointFns = getEntryFunctions(IRDB, EntryPoints); + return buildLLVMBasedCallGraph(IRDB, CGResolver, EntryPointFns, S); +} diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 92cc52460..2605397da 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -13,6 +13,8 @@ #include "phasar/ControlFlow/CallGraphAnalysisType.h" #include "phasar/ControlFlow/CallGraphData.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -21,13 +23,9 @@ #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/MaybeUniquePtr.h" -#include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Soundness.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/ErrorHandling.h" @@ -35,305 +33,16 @@ #include namespace psr { -struct LLVMBasedICFG::Builder { - LLVMProjectIRDB *IRDB = nullptr; - const LLVMVFTableProvider *VTP{}; - Resolver *Res = nullptr; - CallGraphBuilder - CGBuilder{}; - llvm::DenseSet VisitedFunctions{}; - llvm::SmallVector UserEntryPoints{}; - llvm::Function *GlobalCleanupFn = nullptr; - - llvm::SmallDenseMap - GlobalRegisteredDtorsCaller{}; - - // The worklist for direct callee resolution. - llvm::SmallVector FunctionWL{}; - - // Map indirect calls to the number of possible targets found for it. Fixpoint - // is not reached when more targets are found. - llvm::DenseMap IndirectCalls{}; - - void initEntryPoints(llvm::ArrayRef EntryPoints); - void initGlobalsAndWorkList(LLVMBasedICFG *ICFG, bool IncludeGlobals); - [[nodiscard]] CallGraph - buildCallGraph(Soundness S); - - /// \returns FixPointReached - bool processFunction(/*bidigraph_t &Callgraph,*/ const llvm::Function *F); - /// \returns FoundNewTargets - bool constructDynamicCall(const llvm::Instruction *CS); -}; - -void LLVMBasedICFG::Builder::initEntryPoints( - llvm::ArrayRef EntryPoints) { - if (EntryPoints.size() == 1 && EntryPoints.front() == "__ALL__") { - UserEntryPoints.reserve(IRDB->getNumFunctions()); - // Handle the special case in which a user wishes to treat all functions as - // entry points. - for (const auto *Fun : IRDB->getAllFunctions()) { - // Only functions with external linkage (or 'main') can be called from the - // outside! - if (!Fun->isDeclaration() && Fun->hasName() && - (Fun->hasExternalLinkage() || Fun->getName() == "main")) { - UserEntryPoints.push_back(IRDB->getFunction(Fun->getName())); - } - } - } else { - UserEntryPoints.reserve(EntryPoints.size()); - for (const auto &EntryPoint : EntryPoints) { - auto *F = IRDB->getFunctionDefinition(EntryPoint); - if (F == nullptr) { - PHASAR_LOG_LEVEL(WARNING, - "Could not retrieve function for entry point '" - << EntryPoint << "'"); - continue; - } - UserEntryPoints.push_back(F); - } - } -} - -void LLVMBasedICFG::Builder::initGlobalsAndWorkList(LLVMBasedICFG *ICFG, - bool IncludeGlobals) { - FunctionWL.reserve(IRDB->getNumFunctions()); +void LLVMBasedICFG::initialize(LLVMProjectIRDB *IRDB, Resolver &CGResolver, + llvm::ArrayRef EntryPoints, + Soundness S, bool IncludeGlobals) { if (IncludeGlobals) { - const auto *GlobCtor = ICFG->buildCRuntimeGlobalCtorsDtorsModel( - *IRDB->getModule(), UserEntryPoints); - FunctionWL.push_back(GlobCtor); + auto *EntryFun = GlobalCtorsDtorsModel::buildModel(*IRDB, EntryPoints); + this->CG = buildLLVMBasedCallGraph(*IRDB, CGResolver, {EntryFun}, S); } else { - FunctionWL.insert(FunctionWL.end(), UserEntryPoints.begin(), - UserEntryPoints.end()); + this->CG = buildLLVMBasedCallGraph(*IRDB, CGResolver, EntryPoints, S); } - // Note: Pre-allocate the call-graph builder *after* adding the - // CRuntimeGlobalCtorsDtorsModel - CGBuilder.reserve(IRDB->getNumFunctions()); -} - -auto LLVMBasedICFG::Builder::buildCallGraph(Soundness /*S*/) - -> CallGraph { - PHASAR_LOG_LEVEL_CAT(INFO, "LLVMBasedICFG", - "Starting CallGraphAnalysisType: " << Res->str()); - VisitedFunctions.reserve(IRDB->getNumFunctions()); - - bool FixpointReached; - - do { - FixpointReached = true; - while (!FunctionWL.empty()) { - const llvm::Function *F = FunctionWL.pop_back_val(); - FixpointReached &= processFunction(F); - } - - /// XXX This can probably be done more efficiently. - /// However, we cannot just work on the IndirectCalls-delta as we are - /// mutating the points-to-info on the fly - for (auto [CS, _] : IndirectCalls) { - FixpointReached &= !constructDynamicCall(CS); - } - - } while (!FixpointReached); - for (const auto &[IndirectCall, Targets] : IndirectCalls) { - if (Targets == 0) { - PHASAR_LOG_LEVEL(WARNING, "No callees found for callsite " - << llvmIRToString(IndirectCall)); - } - } - - PAMM_GET_INSTANCE; - REG_COUNTER("CG Functions", CGBuilder.viewCallGraph().getNumVertexFunctions(), - Full); - REG_COUNTER("CG CallSites", CGBuilder.viewCallGraph().getNumVertexCallSites(), - Full); - PHASAR_LOG_LEVEL_CAT(INFO, "LLVMBasedICFG", - "Call graph has been constructed"); - return CGBuilder.consumeCallGraph(); -} - -bool LLVMBasedICFG::Builder::processFunction(const llvm::Function *F) { - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Walking in function: " << F->getName()); - if (F->isDeclaration() || !VisitedFunctions.insert(F).second) { - PHASAR_LOG_LEVEL_CAT( - DEBUG, "LLVMBasedICFG", - "Function already visited or only declaration: " << F->getName()); - return true; - } - - assert(Res != nullptr); - - // add a node for function F to the call graph (if not present already) - std::ignore = CGBuilder.addFunctionVertex(F); - - bool FixpointReached = true; - - // iterate all instructions of the current function - Resolver::FunctionSetTy PossibleTargets; - for (const auto &I : llvm::instructions(F)) { - if (const auto *CS = llvm::dyn_cast(&I)) { - Res->preCall(&I); - - // check if function call can be resolved statically - if (CS->getCalledFunction() != nullptr) { - PossibleTargets.insert(CS->getCalledFunction()); - - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found static call-site: " - << " " << llvmIRToString(CS)); - } else { - // still try to resolve the called function statically - const llvm::Value *SV = CS->getCalledOperand()->stripPointerCasts(); - const llvm::Function *ValueFunction = - !SV->hasName() ? nullptr : IRDB->getFunction(SV->getName()); - if (ValueFunction) { - PossibleTargets.insert(ValueFunction); - PHASAR_LOG_LEVEL_CAT( - DEBUG, "LLVMBasedICFG", - "Found static call-site: " << llvmIRToString(CS)); - } else { - if (llvm::isa(SV)) { - continue; - } - // the function call must be resolved dynamically - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found dynamic call-site: " - << " " << llvmIRToString(CS)); - IndirectCalls[CS] = 0; - std::ignore = CGBuilder.addInstructionVertex(CS); - - FixpointReached = false; - continue; - } - } - - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found " << PossibleTargets.size() - << " possible target(s)"); - - Res->handlePossibleTargets(CS, PossibleTargets); - - auto *CallSiteId = CGBuilder.addInstructionVertex(CS); - - // Insert possible target inside the graph and add the link with - // the current function - for (const auto *PossibleTarget : PossibleTargets) { - CGBuilder.addCallEdge(CS, CallSiteId, PossibleTarget); - - FunctionWL.push_back(PossibleTarget); - } - - Res->postCall(&I); - } else { - Res->otherInst(&I); - } - PossibleTargets.clear(); - } - - return FixpointReached; -} - -static bool internalIsVirtualFunctionCall(const llvm::Instruction *Inst, - const LLVMVFTableProvider &VTP) { - assert(Inst != nullptr); - const auto *CallSite = llvm::dyn_cast(Inst); - if (!CallSite) { - return false; - } - // check potential receiver type - const auto *RecType = getReceiverType(CallSite); - if (!RecType) { - return false; - } - if (!VTP.hasVFTable(RecType)) { - return false; - } - return getVFTIndex(CallSite) >= 0; -} - -bool LLVMBasedICFG::Builder::constructDynamicCall(const llvm::Instruction *CS) { - bool NewTargetsFound = false; - // Find vertex of calling function. - - auto *Callees = CGBuilder.getInstVertexOrNull(CS); - - if (!Callees) { - llvm::report_fatal_error( - "constructDynamicCall: Did not find vertex of calling function " + - CS->getFunction()->getName() + " at callsite " + llvmIRToString(CS)); - } - - if (const auto *CallSite = llvm::dyn_cast(CS)) { - Res->preCall(CallSite); - - // the function call must be resolved dynamically - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Looking into dynamic call-site: "); - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", " " << llvmIRToString(CS)); - // call the resolve routine - - assert(VTP != nullptr); - auto PossibleTargets = internalIsVirtualFunctionCall(CallSite, *VTP) - ? Res->resolveVirtualCall(CallSite) - : Res->resolveFunctionPointer(CallSite); - - assert(IndirectCalls.count(CallSite)); - - auto &NumIndCalls = IndirectCalls[CallSite]; - - if (NumIndCalls < PossibleTargets.size()) { - PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found " << PossibleTargets.size() - NumIndCalls - << " new possible target(s)"); - NumIndCalls = PossibleTargets.size(); - NewTargetsFound = true; - } - if (!NewTargetsFound) { - return NewTargetsFound; - } - - // Throw out already found targets - for (const auto *Tgt : *Callees) { - PossibleTargets.erase(Tgt); - } - - Res->handlePossibleTargets(CallSite, PossibleTargets); - // Insert possible target inside the graph and add the link with - // the current function - for (const auto *PossibleTarget : PossibleTargets) { - CGBuilder.addCallEdge(CallSite, Callees, PossibleTarget); - FunctionWL.push_back(PossibleTarget); - } - - Res->postCall(CallSite); - } else { - Res->otherInst(CS); - } - - return NewTargetsFound; -} - -void LLVMBasedICFG::initialize(LLVMProjectIRDB *IRDB, Resolver &CGResolver, - llvm::ArrayRef EntryPoints, - const LLVMVFTableProvider &VTP, Soundness S, - bool IncludeGlobals) { - Builder B{IRDB, &VTP, &CGResolver}; - - B.initEntryPoints(EntryPoints); - B.initGlobalsAndWorkList(this, IncludeGlobals); - - PHASAR_LOG_LEVEL_CAT( - INFO, "LLVMBasedICFG", - "Starting ICFG construction " - << std::chrono::steady_clock::now().time_since_epoch().count()); - - this->CG = B.buildCallGraph(S); - - PHASAR_LOG_LEVEL_CAT( - INFO, "LLVMBasedICFG", - "Finished ICFG construction " - << std::chrono::steady_clock::now().time_since_epoch().count()); } LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, @@ -352,7 +61,7 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, } auto CGRes = Resolver::create(CGType, IRDB, &VTP, TH, PT); - initialize(IRDB, *CGRes, EntryPoints, VTP, S, IncludeGlobals); + initialize(IRDB, *CGRes, EntryPoints, S, IncludeGlobals); } LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, Resolver &CGResolver, @@ -360,7 +69,8 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, Resolver &CGResolver, Soundness S, bool IncludeGlobals) : IRDB(IRDB), VTP(*IRDB) { assert(IRDB != nullptr); - initialize(IRDB, CGResolver, EntryPoints, VTP, S, IncludeGlobals); + + initialize(IRDB, CGResolver, EntryPoints, S, IncludeGlobals); } LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, Resolver &CGResolver, @@ -369,13 +79,14 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, Resolver &CGResolver, Soundness S, bool IncludeGlobals) : IRDB(IRDB), VTP(std::move(VTP)) { assert(IRDB != nullptr); - initialize(IRDB, CGResolver, EntryPoints, this->VTP, S, IncludeGlobals); + initialize(IRDB, CGResolver, EntryPoints, S, IncludeGlobals); } -LLVMBasedICFG::LLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB) +LLVMBasedICFG::LLVMBasedICFG(CallGraph CG, + const LLVMProjectIRDB *IRDB) : CG(std::move(CG)), IRDB(IRDB), VTP(*IRDB) {} -LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, +LLVMBasedICFG::LLVMBasedICFG(const LLVMProjectIRDB *IRDB, const CallGraphData &SerializedCG) : CG(CallGraph::deserialize( SerializedCG, @@ -385,19 +96,6 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, LLVMBasedICFG::~LLVMBasedICFG() = default; -bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { - if (F.hasName()) { - llvm::StringRef FunctionName = F.getName(); - return llvm::StringSwitch(FunctionName) - .Cases(GlobalCRuntimeModelName, GlobalCRuntimeDtorModelName, - GlobalCRuntimeDtorsCallerName, - GlobalCRuntimeUserEntrySelectorName, true) - .Default(false); - } - - return false; -} - [[nodiscard]] FunctionRange LLVMBasedICFG::getAllFunctionsImpl() const { return IRDB->getAllFunctions(); } @@ -413,7 +111,7 @@ bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { } [[nodiscard]] bool LLVMBasedICFG::isVirtualFunctionCallImpl(n_t Inst) const { - return internalIsVirtualFunctionCall(Inst, VTP); + return psr::isVirtualCall(Inst, VTP); } [[nodiscard]] auto LLVMBasedICFG::allNonCallStartNodesImpl() const diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp index 80b99a71a..f825f5254 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp @@ -22,14 +22,9 @@ using namespace psr; namespace psr { -NOResolver::NOResolver(const LLVMProjectIRDB *IRDB) : Resolver(IRDB) {} - -void NOResolver::preCall(const llvm::Instruction *Inst) {} - -void NOResolver::handlePossibleTargets(const llvm::CallBase *CallSite, - FunctionSetTy &PossibleTargets) {} - -void NOResolver::postCall(const llvm::Instruction *Inst) {} +NOResolver::NOResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP) + : Resolver(IRDB, VTP) {} auto NOResolver::resolveVirtualCall(const llvm::CallBase * /*CallSite*/) -> FunctionSetTy { @@ -41,8 +36,6 @@ auto NOResolver::resolveFunctionPointer(const llvm::CallBase * /*CallSite*/) return {}; } -void NOResolver::otherInst(const llvm::Instruction *Inst) {} - std::string NOResolver::str() const { return "NOResolver"; } } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index c1f783d54..fb2d8a896 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -34,8 +34,6 @@ OTFResolver::OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT) : Resolver(IRDB, VTP), PT(PT) {} -void OTFResolver::preCall(const llvm::Instruction *Inst) {} - void OTFResolver::handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &CalleeTargets) { // if we have no inter-procedural points-to information, use call-graph @@ -67,8 +65,6 @@ void OTFResolver::handlePossibleTargets(const llvm::CallBase *CallSite, } } -void OTFResolver::postCall(const llvm::Instruction *Inst) {} - auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) -> FunctionSetTy { FunctionSetTy PossibleCallTargets; diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 745a4e866..e55edefce 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -108,14 +108,31 @@ bool psr::isConsistentCall(const llvm::CallBase *CallSite, return true; } -namespace psr { +bool psr::isVirtualCall(const llvm::Instruction *Inst, + const LLVMVFTableProvider &VTP) { + assert(Inst != nullptr); + const auto *CallSite = llvm::dyn_cast(Inst); + if (!CallSite) { + return false; + } + // check potential receiver type + const auto *RecType = getReceiverType(CallSite); + if (!RecType) { + return false; + } -Resolver::Resolver(const LLVMProjectIRDB *IRDB) : IRDB(IRDB), VTP(nullptr) { - assert(IRDB != nullptr); + if (!VTP.hasVFTable(RecType)) { + return false; + } + return getVFTIndex(CallSite) >= 0; } +namespace psr { + Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) - : IRDB(IRDB), VTP(VTP) {} + : IRDB(IRDB), VTP(VTP) { + assert(VTP != nullptr); +} const llvm::Function * Resolver::getNonPureVirtualVFTEntry(const llvm::StructType *T, unsigned Idx, @@ -140,6 +157,14 @@ void Resolver::handlePossibleTargets(const llvm::CallBase *CallSite, void Resolver::postCall(const llvm::Instruction *Inst) {} +auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) + -> FunctionSetTy { + if (VTP && isVirtualCall(CallSite, *VTP)) { + return resolveVirtualCall(CallSite); + } + return resolveFunctionPointer(CallSite); +} + auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) -> FunctionSetTy { // we may wish to optimise this function @@ -170,7 +195,7 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, switch (Ty) { case CallGraphAnalysisType::NORESOLVE: - return std::make_unique(IRDB); + return std::make_unique(IRDB, VTP); case CallGraphAnalysisType::CHA: assert(TH != nullptr); return std::make_unique(IRDB, VTP, TH); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp index 0977c8189..5c1a295fc 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp @@ -9,6 +9,7 @@ #include "phasar/Config/Configuration.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -93,10 +94,9 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest) { // GlobalCtor->print(llvm::outs()); - ensureFunctionOrdering( - GlobalCtor, ICFG, - {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, - {"main", LLVMBasedICFG::GlobalCRuntimeDtorModelName}}); + ensureFunctionOrdering(GlobalCtor, ICFG, + {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, + {"main", GlobalCtorsDtorsModel::DtorModelName}}); } TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest2) { @@ -145,12 +145,11 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, DtorTest1) { ensureFunctionOrdering( GlobalCtor, ICFG, {{"_GLOBAL__sub_I_globals_dtor_1.cpp", "main"}, - {"main", LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + + {"main", GlobalCtorsDtorsModel::DtorsCallerName.str() + ".globals_dtor_1_cpp.ll"}}); - auto *GlobalDtor = - IRDB.getFunction(LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + - ".globals_dtor_1_cpp.ll"); + auto *GlobalDtor = IRDB.getFunction( + GlobalCtorsDtorsModel::DtorsCallerName.str() + ".globals_dtor_1_cpp.ll"); ASSERT_NE(nullptr, GlobalDtor); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp index 249283fde..9b001834a 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp @@ -2,6 +2,7 @@ #include "phasar/Config/Configuration.h" #include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -85,7 +86,7 @@ TEST(LLVMBasedICFGTest, StaticCallSite_2b) { const llvm::Function *CTOR = IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeModelName); const llvm::Function *DTOR = - IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeDtorModelName); + IRDB.getFunctionDefinition(GlobalCtorsDtorsModel::DtorModelName); ASSERT_TRUE(F); ASSERT_TRUE(FOO); ASSERT_TRUE(BAR);