From 0c6d0c03356f780c7b16e673bd03bf47daa18162 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 29 Mar 2024 10:54:39 +0100 Subject: [PATCH 01/17] Start with sparse ICFG --- include/phasar/ControlFlow/CFGBase.h | 2 +- include/phasar/ControlFlow/SparseCFGBase.h | 43 +++++++++ .../phasar/ControlFlow/SparseCFGProvider.h | 39 ++++++++ .../ControlFlow/SparseLLVMBasedCFG.h | 47 ++++++++++ .../ControlFlow/SparseLLVMBasedCFGProvider.h | 33 +++++++ .../ControlFlow/SparseLLVMBasedICFG.h | 53 +++++++++++ .../ControlFlow/SparseLLVMBasedICFG.cpp | 89 +++++++++++++++++++ 7 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 include/phasar/ControlFlow/SparseCFGBase.h create mode 100644 include/phasar/ControlFlow/SparseCFGProvider.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h create mode 100644 lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp diff --git a/include/phasar/ControlFlow/CFGBase.h b/include/phasar/ControlFlow/CFGBase.h index 57e358225..9d990f76d 100644 --- a/include/phasar/ControlFlow/CFGBase.h +++ b/include/phasar/ControlFlow/CFGBase.h @@ -135,7 +135,7 @@ template class CFGBase { return self().getAsJsonImpl(Fun); } -private: +protected: Derived &self() noexcept { return static_cast(*this); } const Derived &self() const noexcept { return static_cast(*this); diff --git a/include/phasar/ControlFlow/SparseCFGBase.h b/include/phasar/ControlFlow/SparseCFGBase.h new file mode 100644 index 000000000..3d5f531c2 --- /dev/null +++ b/include/phasar/ControlFlow/SparseCFGBase.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_CONTROLFLOW_SPARSECFGBASE_H +#define PHASAR_CONTROLFLOW_SPARSECFGBASE_H + +#include "phasar/ControlFlow/CFGBase.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Nullable.h" + +namespace psr { +template class SparseCFGBase : public CFGBase { +public: + using typename CFGBase::n_t; + using typename CFGBase::f_t; + + /// Gets the next instruction in control-flow order, starting from + /// FromInstruction, that may use or define Val. + /// If the next user is ambiguous, returns null. + [[nodiscard]] Nullable + nextUserOrNull(ByConstRef FromInstruction) const { + return self().nextUserOrNullImpl(FromInstruction); + } + +protected: + using CFGBase::self; +}; + +template +// NOLINTNEXTLINE(readability-identifier-naming) +constexpr bool is_sparse_cfg_v = is_crtp_base_of_v + &&std::is_same_v + &&std::is_same_v; + +} // namespace psr + +#endif // PHASAR_CONTROLFLOW_SPARSECFGBASE_H diff --git a/include/phasar/ControlFlow/SparseCFGProvider.h b/include/phasar/ControlFlow/SparseCFGProvider.h new file mode 100644 index 000000000..c667e4f23 --- /dev/null +++ b/include/phasar/ControlFlow/SparseCFGProvider.h @@ -0,0 +1,39 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H +#define PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H + +#include "phasar/Utils/ByRef.h" + +#include + +namespace psr { +template class SparseCFGProvider { +public: + using f_t = F; + using v_t = V; + + template + [[nodiscard]] decltype(auto) getSparseCFG(ByConstRef Fun, + const D &Val) const { + static_assert(std::is_convertible_v); + return self().getSparseCFGImpl(Fun, valueOf(Val)); + } + +private: + Derived &self() noexcept { return static_cast(*this); } + const Derived &self() const noexcept { + return static_cast(*this); + } +}; + +} // namespace psr + +#endif // PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h new file mode 100644 index 000000000..c9f811935 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H + +#include "phasar/ControlFlow/SparseCFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" + +#include "llvm/ADT/DenseMap.h" + +namespace psr { + +class SparseLLVMBasedCFG; + +template <> struct CFGTraits : CFGTraits { + using v_t = const llvm::Value *; +}; + +class SparseLLVMBasedCFG : public LLVMBasedCFG, + public SparseCFGBase { + friend class SparseLLVMBasedICFG; + +public: + SparseLLVMBasedCFG() noexcept = default; + SparseLLVMBasedCFG( + llvm::SmallDenseMap + &&VGraph) noexcept + : VGraph(std::move(VGraph)) {} + +private: + [[nodiscard]] n_t nextUserOrNullImpl(n_t FromInstruction) const { + return VGraph.lookup(FromInstruction); + } + + llvm::SmallDenseMap + VGraph; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h new file mode 100644 index 000000000..30e56628d --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h @@ -0,0 +1,33 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H + +#include "phasar/ControlFlow/SparseCFGProvider.h" + +namespace llvm { +class Function; +class Value; +} // namespace llvm + +namespace psr { + +template +using SparseLLVMBasedCFGProvider = + SparseCFGProvider; + +[[nodiscard]] constexpr const llvm::Value * +valueOf(const llvm::Value *V) noexcept { + return V; +} + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h new file mode 100644 index 000000000..80da66e18 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -0,0 +1,53 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" + +#include + +namespace psr { +class SparseLLVMBasedCFG; + +class SparseLLVMBasedICFG + : public LLVMBasedICFG, + public SparseLLVMBasedCFGProvider { +public: + explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints = {}, + LLVMTypeHierarchy *TH = nullptr, + LLVMAliasInfoRef PT = nullptr, + Soundness S = Soundness::Soundy, + bool IncludeGlobals = true); + + /// Creates an ICFG with an already given call-graph + explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, + LLVMTypeHierarchy *TH = nullptr); + + explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + const nlohmann::json &SerializedCG, + LLVMTypeHierarchy *TH = nullptr); + + ~SparseLLVMBasedICFG(); + +private: + [[nodiscard]] const SparseLLVMBasedCFG & + getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; + + struct CacheData; + + std::unique_ptr SparseCFGCache; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp new file mode 100644 index 000000000..5698b1f91 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -0,0 +1,89 @@ +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" + +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" + +#include "llvm/IR/IntrinsicInst.h" + +#include +#include +#include + +using namespace psr; + +struct FVHasher { + auto operator()(std::pair FV) + const noexcept { + return llvm::hash_value(FV); + } +}; + +struct SparseLLVMBasedICFG::CacheData { + std::unordered_map, SparseLLVMBasedCFG, FVHasher> Cache{}; +}; + +SparseLLVMBasedICFG::~SparseLLVMBasedICFG() = default; + +SparseLLVMBasedICFG::SparseLLVMBasedICFG( + LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, LLVMTypeHierarchy *TH, + LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) + : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), + SparseCFGCache(new CacheData{}) {} + +SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, + LLVMProjectIRDB *IRDB, + LLVMTypeHierarchy *TH) + : LLVMBasedICFG(std::move(CG), IRDB, TH), SparseCFGCache(new CacheData{}) {} + +SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + const nlohmann::json &SerializedCG, + LLVMTypeHierarchy *TH) + : LLVMBasedICFG(IRDB, SerializedCG, TH), SparseCFGCache(new CacheData{}) {} + +static bool shouldKeepInst(const llvm::Instruction *Inst, + const llvm::Value *Val) { + // TODO + return true; +} + +static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG &SCFG, + const llvm::Function *Fun, const llvm::Value *Val) { + llvm::SmallVector< + std::pair> + WL; + + // -- Initialization + + const auto *Entry = &Fun->getEntryBlock().front(); + if (llvm::isa(Entry)) { + Entry = Entry->getNextNonDebugInstruction(); + } + + for (const auto *Succ : CFG.getSuccsOf(Entry)) { + WL.emplace_back(Entry, Succ); + } + + // -- Fixpoint Iteration + + while (!WL.empty()) { + auto [From, To] = WL.pop_back_val(); + + // TODO + } +} + +const SparseLLVMBasedCFG & +SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, + const llvm::Value *Val) const { + assert(SparseCFGCache != nullptr); + + // TODO: Make thread-safe + + auto [It, Inserted] = + SparseCFGCache->Cache.try_emplace(std::make_pair(Fun, Val)); + if (Inserted) { + buildSparseCFG(*this, It->second, Fun, Val); + } + + return It->second; +} From 58d4f671f05ae243c8833e00481bdcdb71c6919b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 1 Apr 2024 15:32:38 +0200 Subject: [PATCH 02/17] Build sparse CFG (not tested yet) --- .../ControlFlow/SparseLLVMBasedCFG.h | 6 +- .../ControlFlow/SparseLLVMBasedICFG.cpp | 100 +++++++++++++++++- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index c9f811935..066f2fa22 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -28,6 +28,9 @@ class SparseLLVMBasedCFG : public LLVMBasedCFG, friend class SparseLLVMBasedICFG; public: + using vgraph_t = + llvm::SmallDenseMap; + SparseLLVMBasedCFG() noexcept = default; SparseLLVMBasedCFG( llvm::SmallDenseMap @@ -39,8 +42,7 @@ class SparseLLVMBasedCFG : public LLVMBasedCFG, return VGraph.lookup(FromInstruction); } - llvm::SmallDenseMap - VGraph; + vgraph_t VGraph; }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index 5698b1f91..adda90d9f 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -2,7 +2,13 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Casting.h" #include #include @@ -40,13 +46,88 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, LLVMTypeHierarchy *TH) : LLVMBasedICFG(IRDB, SerializedCG, TH), SparseCFGCache(new CacheData{}) {} +static const llvm::Type *getPointeeTypeOrNull(const llvm::Value *V) { + // TODO + if (const auto *Alloca = llvm::dyn_cast(V)) { + return Alloca->getAllocatedType(); + } + if (const auto *Arg = llvm::dyn_cast(V)) { + if (const auto *ByValTy = Arg->getParamByValType()) { + return ByValTy; + } + if (const auto *ByValTy = Arg->getParamStructRetType()) { + return ByValTy; + } + } + + // TODO: Handle more cases + + return nullptr; +} + +static bool fuzzyMayAlias(const llvm::Value * /*Ptr1*/, + const llvm::Type *PointeeTy1, + const llvm::Value * /*Ptr2*/, + const llvm::Type *PointeeTy2) { + // Pointers to pointers may alias with any pointer, because the analysis may + // not be field-sensitive. + // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume + // anything. + + if (!PointeeTy1 || PointeeTy1->isPointerTy()) { + return true; + } + + if (!PointeeTy2 || PointeeTy2->isPointerTy()) { + return true; + } + + return PointeeTy1 == PointeeTy2; +} + static bool shouldKeepInst(const llvm::Instruction *Inst, const llvm::Value *Val) { + if (Inst == Val || llvm::pred_size(Inst->getParent()) != 1) { + // First in BB always stays for now + return true; + } + + const auto *ValTy = Val->getType(); + bool ValPtr = ValTy->isPointerTy(); + const auto *PointeeTy = ValPtr ? getPointeeTypeOrNull(Val) : nullptr; + + if (const auto *Call = llvm::dyn_cast(Inst)) { + if (llvm::isa(Val)) { + return true; + } + } + + for (const auto *Op : Inst->operand_values()) { + if (Op == Val) { + return true; + } + const auto *OpTy = Op->getType(); + bool OpPtr = OpTy->isPointerTy(); + + if (ValPtr != OpPtr) { + // Pointers cannot influence non-pointers + continue; + } + if (!ValPtr) { + continue; + } + + if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { + return true; + } + } + // TODO - return true; + return false; } -static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG &SCFG, +static void buildSparseCFG(const LLVMBasedCFG &CFG, + SparseLLVMBasedCFG::vgraph_t &SCFG, const llvm::Function *Fun, const llvm::Value *Val) { llvm::SmallVector< std::pair> @@ -68,7 +149,18 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG &SCFG, while (!WL.empty()) { auto [From, To] = WL.pop_back_val(); - // TODO + const auto *Curr = From; + if (shouldKeepInst(To, Val)) { + Curr = To; + auto Inserted = SCFG.try_emplace(From, To).second; + if (!Inserted) { + continue; + } + } + + for (const auto *Succ : CFG.getSuccsOf(To)) { + WL.emplace_back(Curr, Succ); + } } } @@ -82,7 +174,7 @@ SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, auto [It, Inserted] = SparseCFGCache->Cache.try_emplace(std::make_pair(Fun, Val)); if (Inserted) { - buildSparseCFG(*this, It->second, Fun, Val); + buildSparseCFG(*this, It->second.VGraph, Fun, Val); } return It->second; From acc467daa2d2cba28aabc4b8894924466721daa6 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 4 Apr 2024 19:35:25 +0200 Subject: [PATCH 03/17] Integrate Sparse CFG into IDESolver + add test (WIP) --- .../phasar/ControlFlow/SparseCFGProvider.h | 15 ++ .../DataFlow/IfdsIde/Solver/IDESolver.h | 80 ++++++++-- .../ControlFlow/SparseLLVMBasedCFG.h | 1 + .../ControlFlow/SparseLLVMBasedICFG.h | 2 + include/phasar/Utils/Nullable.h | 22 +++ .../ControlFlow/SparseLLVMBasedICFG.cpp | 22 ++- .../DataFlow/IfdsIde/CMakeLists.txt | 1 + .../DataFlow/IfdsIde/SparseIDESolverTest.cpp | 144 ++++++++++++++++++ 8 files changed, 277 insertions(+), 10 deletions(-) create mode 100644 unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp diff --git a/include/phasar/ControlFlow/SparseCFGProvider.h b/include/phasar/ControlFlow/SparseCFGProvider.h index c667e4f23..1ef2882f6 100644 --- a/include/phasar/ControlFlow/SparseCFGProvider.h +++ b/include/phasar/ControlFlow/SparseCFGProvider.h @@ -15,6 +15,8 @@ #include namespace psr { +template T valueOf(T Val) { return Val; } + template class SparseCFGProvider { public: using f_t = F; @@ -23,6 +25,7 @@ template class SparseCFGProvider { template [[nodiscard]] decltype(auto) getSparseCFG(ByConstRef Fun, const D &Val) const { + using psr::valueOf; static_assert(std::is_convertible_v); return self().getSparseCFGImpl(Fun, valueOf(Val)); } @@ -34,6 +37,18 @@ template class SparseCFGProvider { } }; +template +struct has_getSparseCFG : std::false_type {}; // NOLINT +template +struct has_getSparseCFG< + T, D, + std::void_t().getSparseCFG( + std::declval(), std::declval()))>> + : std::true_type {}; + +template +// NOLINTNEXTLINE +static constexpr bool has_getSparseCFG_v = has_getSparseCFG::value; } // namespace psr #endif // PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index 3274ced1a..f771cbe3c 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -18,6 +18,7 @@ #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVER_H #include "phasar/Config/Configuration.h" +#include "phasar/ControlFlow/SparseCFGProvider.h" #include "phasar/DB/ProjectIRDBBase.h" #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionStats.h" @@ -35,15 +36,18 @@ #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/Average.h" +#include "phasar/Utils/ByRef.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/Nullable.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Table.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" #include "nlohmann/json.hpp" @@ -81,14 +85,23 @@ class IDESolver using t_t = typename AnalysisDomainTy::t_t; using v_t = typename AnalysisDomainTy::v_t; + template IDESolver(IDETabulationProblem &Problem, - const i_t *ICF) + const I *ICF) : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), JumpFn(std::make_shared>()), Seeds(Problem.initialSeeds()) { assert(ICF != nullptr); + + if constexpr (has_getSparseCFG_v) { + NextUserOrNullCB = [](const i_t *ICF, ByConstRef Fun, + ByConstRef d3, ByConstRef n) { + auto &&SCFG = static_cast(*ICF).getSparseCFG(Fun, d3); + return SCFG.nextUserOrNull(n); + }; + } } IDESolver(const IDESolver &) = delete; @@ -335,6 +348,15 @@ class IDESolver } protected: + Nullable getNextUserOrNull(ByConstRef Fun, ByConstRef d3, + ByConstRef n) { + if (!NextUserOrNullCB || IDEProblem.isZeroValue(d3)) { + return {}; + } + + return NextUserOrNullCB(ICF, Fun, d3, n); + } + /// Lines 13-20 of the algorithm; processing a call site in the caller's /// context. /// @@ -378,6 +400,15 @@ class IDESolver bool HasNoCalleeInformation = true; + auto &&Fun = ICF->getFunctionOf(n); + auto GetNextUse = [this, &Fun, &n](n_t nPrime, ByConstRef d3) { + if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { + return psr::unwrapNullable(std::forward(NextUser)); + } + + return nPrime; + }; + // for each possible callee for (f_t SCalledProcN : Callees) { // still line 14 // check if a special summary for the called procedure exists @@ -405,7 +436,9 @@ class IDESolver "Queried Summary Edge Function: " << SumEdgFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n'); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), + + auto DestN = GetNextUse(ReturnSiteN, d3); + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), f.composeWith(SumEdgFnE)); } } @@ -504,8 +537,10 @@ class IDESolver d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); + + auto DestN = GetNextUse(RetSiteN, d5_restoredCtx); WorkList.emplace_back( - PathEdge(d1, RetSiteN, std::move(d5_restoredCtx)), + PathEdge(d1, DestN, std::move(d5_restoredCtx)), f.composeWith(fPrime)); } } @@ -541,7 +576,8 @@ class IDESolver auto fPrime = f.composeWith(EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), + auto DestN = GetNextUse(ReturnSiteN, d3); + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), std::move(fPrime)); } } @@ -559,6 +595,8 @@ class IDESolver EdgeFunction f = jumpFunction(Edge); auto [d1, n, d2] = Edge.consume(); + const auto &Fun = ICF->getFunctionOf(n); + for (const auto nPrime : ICF->getSuccsOf(n)) { FlowFunctionPtrType FlowFunc = CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); @@ -571,14 +609,24 @@ class IDESolver CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); EdgeFunction fPrime = f.composeWith(g); + + auto DestN = [&, &n = n] { + if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { + return psr::unwrapNullable( + std::forward(NextUser)); + } + + return nPrime; + }(); + if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] + IntermediateEdgeFunctions[std::make_tuple(n, d2, DestN, d3)] .push_back(g); } PHASAR_LOG_LEVEL(DEBUG, "Compose: " << g << " * " << f << " = " << fPrime); INC_COUNTER("EF Queries", 1, Full); - WorkList.emplace_back(PathEdge(d1, nPrime, std::move(d3)), + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), std::move(fPrime)); } } @@ -911,6 +959,7 @@ class IDESolver for (const auto &Entry : Inc) { // line 22 n_t c = Entry.first; + auto &&Fun = ICF->getFunctionOf(c); // for each return site for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { // compute return-flow function @@ -963,9 +1012,20 @@ class IDESolver d_t d3 = ValAndFunc.first; d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - WorkList.emplace_back(PathEdge(std::move(d3), RetSiteC, - std::move(d5_restoredCtx)), - f3.composeWith(fPrime)); + + auto DestN = [&] { + if (auto &&NextUser = + getNextUserOrNull(Fun, d5_restoredCtx, c)) { + return psr::unwrapNullable( + std::forward(NextUser)); + } + + return RetSiteC; + }(); + + WorkList.emplace_back( + PathEdge(std::move(d3), DestN, std::move(d5_restoredCtx)), + f3.composeWith(fPrime)); } } } @@ -1805,6 +1865,8 @@ class IDESolver d_t ZeroValue; const i_t *ICF; IFDSIDESolverConfig &SolverConfig; + Nullable (*NextUserOrNullCB)(const i_t *, ByConstRef, + ByConstRef, ByConstRef) = nullptr; std::vector, EdgeFunction>> WorkList; std::vector> ValuePropWL; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index 066f2fa22..e32d5aa4c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -26,6 +26,7 @@ template <> struct CFGTraits : CFGTraits { class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { friend class SparseLLVMBasedICFG; + friend SparseCFGBase; public: using vgraph_t = diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 80da66e18..03a7db3dc 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -21,6 +21,8 @@ class SparseLLVMBasedCFG; class SparseLLVMBasedICFG : public LLVMBasedICFG, public SparseLLVMBasedCFGProvider { + friend SparseLLVMBasedCFGProvider; + public: explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, diff --git a/include/phasar/Utils/Nullable.h b/include/phasar/Utils/Nullable.h index 5bb3b2a9d..829db85ea 100644 --- a/include/phasar/Utils/Nullable.h +++ b/include/phasar/Utils/Nullable.h @@ -12,12 +12,34 @@ #include #include +#include namespace psr { template using Nullable = std::conditional_t, T, std::optional>; + +template +std::enable_if_t, T &&> +unwrapNullable(T &&Val) noexcept { + return std::forward(Val); +} +template +std::enable_if_t, T> +unwrapNullable(std::optional &&Val) noexcept { + return *std::move(Val); +} +template +std::enable_if_t, const T &> +unwrapNullable(const std::optional &Val) noexcept { + return *Val; +} +template +std::enable_if_t, T &> +unwrapNullable(std::optional &Val) noexcept { + return *Val; +} } // namespace psr #endif // PHASAR_UTILS_NULLABLE_H diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index adda90d9f..c2c12aa6f 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -1,11 +1,13 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "llvm/IR/Argument.h" #include "llvm/IR/CFG.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" @@ -85,10 +87,21 @@ static bool fuzzyMayAlias(const llvm::Value * /*Ptr1*/, return PointeeTy1 == PointeeTy2; } +static bool isFirstInBB(const llvm::Instruction *Inst) { + return !Inst->getPrevNode(); +} + +static bool isLastInst(const llvm::Instruction *Inst) { + return !Inst->getNextNode() && llvm::succ_empty(Inst); +} + static bool shouldKeepInst(const llvm::Instruction *Inst, const llvm::Value *Val) { - if (Inst == Val || llvm::pred_size(Inst->getParent()) != 1) { + if (Inst == Val || isFirstInBB(Inst) || isLastInst(Inst)) { // First in BB always stays for now + + // llvm::errs() << "[shouldKeepInst]: 1: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } @@ -98,6 +111,8 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, if (const auto *Call = llvm::dyn_cast(Inst)) { if (llvm::isa(Val)) { + // llvm::errs() << "[shouldKeepInst]: 2: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } } @@ -118,6 +133,8 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, } if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { + // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } } @@ -129,6 +146,9 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG::vgraph_t &SCFG, const llvm::Function *Fun, const llvm::Value *Val) { + + // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " + // << llvmIRToString(Val) << '\n'; llvm::SmallVector< std::pair> WL; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index 9ab94b5db..d70ebc4fe 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -4,6 +4,7 @@ set(IfdsIdeSources EdgeFunctionComposerTest.cpp EdgeFunctionSingletonCacheTest.cpp InteractiveIDESolverTest.cpp + SparseIDESolverTest.cpp ) foreach(TEST_SRC ${IfdsIdeSources}) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp new file mode 100644 index 000000000..41bac4f39 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp @@ -0,0 +1,144 @@ +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/ControlFlow/SparseCFGProvider.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDELinearConstantAnalysis.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/Utils/Soundness.h" +#include "phasar/Utils/TypeTraits.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +#include +#include + +using namespace psr; +namespace { +/* ============== TEST FIXTURE ============== */ +class LinearConstant : public ::testing::TestWithParam { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("linear_constant/"); + const std::vector EntryPoints = {"main"}; + +}; // Test Fixture + +TEST_P(LinearConstant, SparseResultsEquivalent) { + LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); + LLVMTypeHierarchy TH(IRDB); + LLVMAliasSet PT(&IRDB); + + LLVMBasedICFG ICF(&IRDB, CallGraphAnalysisType::OTF, EntryPoints, &TH, &PT); + auto HasGlobalCtor = IRDB.getFunctionDefinition( + LLVMBasedICFG::GlobalCRuntimeModelName) != nullptr; + std::vector Entry = { + HasGlobalCtor ? LLVMBasedICFG::GlobalCRuntimeModelName.str() : "main"}; + SparseLLVMBasedICFG SICF(&IRDB, CallGraphAnalysisType::OTF, Entry, &TH, &PT, + psr::Soundness::Soundy, false); + + static_assert(has_getSparseCFG_v); + + IDELinearConstantAnalysis LCAProblem(&IRDB, &ICF, Entry); + IDELinearConstantAnalysis SLCAProblem(&IRDB, &SICF, Entry); + + auto DenseResults = IDESolver(LCAProblem, &ICF).solve(); + auto SparseResults = IDESolver(SLCAProblem, &SICF).solve(); + + DenseResults.dumpResults(ICF, llvm::outs() << "DenseResults:"); + SparseResults.dumpResults(SICF, llvm::outs() << "SparseResults:"); + + for (auto &&Cell : SparseResults.getAllResultEntries()) { + auto DenseRes = + DenseResults.resultAt(Cell.getRowKey(), Cell.getColumnKey()); + EXPECT_EQ(DenseRes, Cell.getValue()); + } + + // TODO: Check for existing results +} + +static constexpr std::string_view LCATestFiles[] = { + "basic_01_cpp_dbg.ll", + "basic_02_cpp_dbg.ll", + "basic_03_cpp_dbg.ll", + "basic_04_cpp_dbg.ll", + "basic_05_cpp_dbg.ll", + "basic_06_cpp_dbg.ll", + "basic_07_cpp_dbg.ll", + "basic_08_cpp_dbg.ll", + "basic_09_cpp_dbg.ll", + "basic_10_cpp_dbg.ll", + "basic_11_cpp_dbg.ll", + "basic_12_cpp_dbg.ll", + + "branch_01_cpp_dbg.ll", + "branch_02_cpp_dbg.ll", + "branch_03_cpp_dbg.ll", + "branch_04_cpp_dbg.ll", + "branch_05_cpp_dbg.ll", + "branch_06_cpp_dbg.ll", + "branch_07_cpp_dbg.ll", + + "while_01_cpp_dbg.ll", + "while_02_cpp_dbg.ll", + "while_03_cpp_dbg.ll", + "while_04_cpp_dbg.ll", + "while_05_cpp_dbg.ll", + "for_01_cpp_dbg.ll", + + "call_01_cpp_dbg.ll", + "call_02_cpp_dbg.ll", + "call_03_cpp_dbg.ll", + "call_04_cpp_dbg.ll", + "call_05_cpp_dbg.ll", + "call_06_cpp_dbg.ll", + "call_07_cpp_dbg.ll", + "call_08_cpp_dbg.ll", + "call_09_cpp_dbg.ll", + "call_10_cpp_dbg.ll", + "call_11_cpp_dbg.ll", + + "recursion_01_cpp_dbg.ll", + "recursion_02_cpp_dbg.ll", + "recursion_03_cpp_dbg.ll", + + "global_01_cpp_dbg.ll", + "global_02_cpp_dbg.ll", + "global_03_cpp_dbg.ll", + "global_04_cpp_dbg.ll", + "global_05_cpp_dbg.ll", + "global_06_cpp_dbg.ll", + "global_07_cpp_dbg.ll", + "global_08_cpp_dbg.ll", + "global_09_cpp_dbg.ll", + "global_10_cpp_dbg.ll", + "global_11_cpp_dbg.ll", + "global_12_cpp_dbg.ll", + "global_13_cpp_dbg.ll", + "global_14_cpp_dbg.ll", + "global_15_cpp_dbg.ll", + "global_16_cpp_dbg.ll", + + "overflow_add_cpp_dbg.ll", + "overflow_sub_cpp_dbg.ll", + "overflow_mul_cpp_dbg.ll", + "overflow_div_min_by_neg_one_cpp_dbg.ll", + + "ub_division_by_zero_cpp_dbg.ll", + "ub_modulo_by_zero_cpp_dbg.ll", +}; + +INSTANTIATE_TEST_SUITE_P(InteractiveIDESolverTest, LinearConstant, + ::testing::ValuesIn(LCATestFiles)); +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} From fd00f1fdcc00dd2e5463e8b1e8acb28266a72652 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Apr 2024 12:35:46 +0200 Subject: [PATCH 04/17] Make sparse IDE work for linear constant analysis --- .../ControlFlow/SparseLLVMBasedICFG.cpp | 104 ++++++++++++++++-- .../DataFlow/IfdsIde/SparseIDESolverTest.cpp | 4 +- 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index c2c12aa6f..5eae35b06 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -4,12 +4,15 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include @@ -67,9 +70,50 @@ static const llvm::Type *getPointeeTypeOrNull(const llvm::Value *V) { return nullptr; } -static bool fuzzyMayAlias(const llvm::Value * /*Ptr1*/, - const llvm::Type *PointeeTy1, - const llvm::Value * /*Ptr2*/, +static bool isNonPointerType(const llvm::Type *Ty) { + if (const auto *Struct = llvm::dyn_cast(Ty)) { + for (const auto *ElemTy : Struct->elements()) { + // TODO: Go into nested structs recursively + if (!ElemTy->isSingleValueType() || ElemTy->isVectorTy()) { + return false; + } + } + return true; + } + if (const auto *Vec = llvm::dyn_cast(Ty)) { + return !Vec->getElementType()->isPointerTy(); + } + return Ty->isSingleValueType(); +} + +static bool isNonAddressTakenVariable(const llvm::Value *Val) { + const auto *Alloca = llvm::dyn_cast(Val); + if (!Alloca) { + return false; + } + for (const auto &Use : Alloca->uses()) { + if (const auto *Store = llvm::dyn_cast(Use.getUser())) { + if (Use == Store->getValueOperand()) { + return false; + } + } else if (const auto *Call = + llvm::dyn_cast(Use.getUser())) { + auto ArgNo = Use.getOperandNo(); + if (Call->paramHasAttr(ArgNo, llvm::Attribute::StructRet)) { + continue; + } + if (Call->paramHasAttr(ArgNo, llvm::Attribute::NoCapture) && + isNonPointerType(Call->getType())) { + continue; + } + return false; + } + } + return true; +} + +static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Type *PointeeTy1, + const llvm::Value *Ptr2, const llvm::Type *PointeeTy2) { // Pointers to pointers may alias with any pointer, because the analysis may // not be field-sensitive. @@ -84,6 +128,10 @@ static bool fuzzyMayAlias(const llvm::Value * /*Ptr1*/, return true; } + if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { + return false; + } + return PointeeTy1 == PointeeTy2; } @@ -91,13 +139,28 @@ static bool isFirstInBB(const llvm::Instruction *Inst) { return !Inst->getPrevNode(); } -static bool isLastInst(const llvm::Instruction *Inst) { - return !Inst->getNextNode() && llvm::succ_empty(Inst); +static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { + if (Inst->getNextNode()) { + return false; + } + + if (Val->getType()->isPointerTy()) { + return true; + } + + const auto *InstBB = Inst->getParent(); + for (const auto *User : Val->users()) { + const auto *UserInst = llvm::dyn_cast(User); + if (!UserInst || UserInst->getParent() != InstBB) { + return true; + } + } + return llvm::succ_empty(Inst); } static bool shouldKeepInst(const llvm::Instruction *Inst, const llvm::Value *Val) { - if (Inst == Val || isFirstInBB(Inst) || isLastInst(Inst)) { + if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { // First in BB always stays for now // llvm::errs() << "[shouldKeepInst]: 1: " << llvmIRToString(Inst) @@ -119,18 +182,20 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, for (const auto *Op : Inst->operand_values()) { if (Op == Val) { + // llvm::errs() << "[shouldKeepInst]: 3.1: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } + if (!ValPtr) { + continue; + } const auto *OpTy = Op->getType(); bool OpPtr = OpTy->isPointerTy(); - if (ValPtr != OpPtr) { + if (!OpPtr) { // Pointers cannot influence non-pointers continue; } - if (!ValPtr) { - continue; - } if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) @@ -139,6 +204,8 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, } } + // llvm::errs() << "[shouldKeepInst]: FALSE: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; // TODO return false; } @@ -166,18 +233,30 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, // -- Fixpoint Iteration + llvm::SmallDenseSet Handled; + while (!WL.empty()) { auto [From, To] = WL.pop_back_val(); const auto *Curr = From; if (shouldKeepInst(To, Val)) { Curr = To; - auto Inserted = SCFG.try_emplace(From, To).second; + auto [It, Inserted] = SCFG.try_emplace(From, To); if (!Inserted) { - continue; + if (It->second != To) { + // llvm::errs() << "[buildSparseCFG]: Ambiguity at " + // << llvmIRToString(From) << " ::> " + // << llvmIRToShortString(It->second) << " VS " + // << llvmIRToShortString(To) << '\n'; + It->second = nullptr; + } } } + if (!Handled.insert(To).second) { + continue; + } + for (const auto *Succ : CFG.getSuccsOf(To)) { WL.emplace_back(Curr, Succ); } @@ -195,6 +274,7 @@ SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, SparseCFGCache->Cache.try_emplace(std::make_pair(Fun, Val)); if (Inserted) { buildSparseCFG(*this, It->second.VGraph, Fun, Val); + // llvm::errs() << "\n"; } return It->second; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp index 41bac4f39..d26b56088 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp @@ -57,7 +57,9 @@ TEST_P(LinearConstant, SparseResultsEquivalent) { for (auto &&Cell : SparseResults.getAllResultEntries()) { auto DenseRes = DenseResults.resultAt(Cell.getRowKey(), Cell.getColumnKey()); - EXPECT_EQ(DenseRes, Cell.getValue()); + EXPECT_EQ(DenseRes, Cell.getValue()) + << "At " << llvmIRToString(Cell.getRowKey()) + << " :: " << llvmIRToShortString(Cell.getColumnKey()); } // TODO: Check for existing results From 833a139849182b1fb925716317a14da1da42a32d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Apr 2024 12:49:15 +0200 Subject: [PATCH 05/17] Add taint analysis to SparseIDETest (passes) -- NO SPARSITY!! --- .../DataFlow/IfdsIde/SparseIDESolverTest.cpp | 77 +++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp index d26b56088..902b5270e 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp @@ -6,10 +6,13 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDELinearConstantAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" #include "phasar/PhasarLLVM/HelperAnalyses.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/TypeTraits.h" @@ -24,13 +27,16 @@ namespace { /* ============== TEST FIXTURE ============== */ class LinearConstant : public ::testing::TestWithParam { protected: - static constexpr auto PathToLlFiles = - PHASAR_BUILD_SUBFOLDER("linear_constant/"); const std::vector EntryPoints = {"main"}; - -}; // Test Fixture +}; +class DoubleFreeTA : public ::testing::TestWithParam { +protected: + const std::vector EntryPoints = {"main"}; +}; TEST_P(LinearConstant, SparseResultsEquivalent) { + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("linear_constant/"); LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); LLVMTypeHierarchy TH(IRDB); LLVMAliasSet PT(&IRDB); @@ -65,6 +71,60 @@ TEST_P(LinearConstant, SparseResultsEquivalent) { // TODO: Check for existing results } +static LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); +} + +TEST_P(DoubleFreeTA, SparseLeaksEquivalent) { + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("taint_analysis/"); + LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); + LLVMTypeHierarchy TH(IRDB); + LLVMAliasSet PT(&IRDB); + + LLVMBasedICFG ICF(&IRDB, CallGraphAnalysisType::OTF, EntryPoints, &TH, &PT); + auto HasGlobalCtor = IRDB.getFunctionDefinition( + LLVMBasedICFG::GlobalCRuntimeModelName) != nullptr; + std::vector Entry = { + HasGlobalCtor ? LLVMBasedICFG::GlobalCRuntimeModelName.str() : "main"}; + SparseLLVMBasedICFG SICF(&IRDB, CallGraphAnalysisType::OTF, Entry, &TH, &PT, + psr::Soundness::Soundy, false); + + static_assert(has_getSparseCFG_v); + + auto Config = getDoubleFreeConfig(); + IFDSTaintAnalysis TaintProblem(&IRDB, &PT, &Config, Entry); + IFDSTaintAnalysis STaintProblem(&IRDB, &PT, &Config, Entry); + + auto DenseResults = IDESolver(TaintProblem, &ICF).solve(); + auto SparseResults = IDESolver(STaintProblem, &SICF).solve(); + + for (const auto &[LeakInst, Leaks] : TaintProblem.Leaks) { + auto LeakIt = STaintProblem.Leaks.find(LeakInst); + EXPECT_NE(LeakIt, STaintProblem.Leaks.end()) + << "SparseIDE did not find expected leak(s) at " + << llvmIRToString(LeakInst); + + if (LeakIt == STaintProblem.Leaks.end()) { + continue; + } + + const auto &SLeaks = LeakIt->second; + EXPECT_EQ(Leaks, SLeaks) + << "Leak sets at " << llvmIRToString(LeakInst) << " do not match"; + } +} + static constexpr std::string_view LCATestFiles[] = { "basic_01_cpp_dbg.ll", "basic_02_cpp_dbg.ll", @@ -136,8 +196,15 @@ static constexpr std::string_view LCATestFiles[] = { "ub_modulo_by_zero_cpp_dbg.ll", }; -INSTANTIATE_TEST_SUITE_P(InteractiveIDESolverTest, LinearConstant, +static constexpr std::string_view TaintTestFiles[] = { + "double_free_01_c.ll", + "double_free_02_c.ll", +}; + +INSTANTIATE_TEST_SUITE_P(SparseIDETest, LinearConstant, ::testing::ValuesIn(LCATestFiles)); +INSTANTIATE_TEST_SUITE_P(SparseIDETest, DoubleFreeTA, + ::testing::ValuesIn(TaintTestFiles)); } // namespace int main(int Argc, char **Argv) { From 93087b0f1dcc7a7b8982b8875fcdaecd81d203e7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Apr 2024 13:01:41 +0200 Subject: [PATCH 06/17] minor --- include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index 3a5f20d8f..14c2c0542 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -38,11 +38,11 @@ class IFDSSolver using n_t = typename AnalysisDomainTy::n_t; using i_t = typename AnalysisDomainTy::i_t; - template >> IFDSSolver(IFDSTabulationProblem &IFDSProblem, - const i_t *ICF) + const I *ICF) : IDESolver>(IFDSProblem, ICF) {} ~IFDSSolver() override = default; From d101dc7dccb4c3a41c792199901f98f67931e98e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 12:26:07 +0200 Subject: [PATCH 07/17] Select right opt tool for mem2reg --- cmake/phasar_macros.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index 8499a4905..27a105c02 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -118,10 +118,18 @@ function(generate_ll_file) endif() if(GEN_LL_MEM2REG) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + get_filename_component(COMPILER_PATH_STR ${CMAKE_CXX_COMPILER} DIRECTORY) + set(COMPILER_PATH PATHS ${COMPILER_PATH_STR}) + else() + set(COMPILER_PATH) + endif() + find_program(OPT_TOOL opt REQUIRED ${COMPILER_PATH}) + add_custom_command( OUTPUT ${test_code_ll_file} COMMAND ${GEN_CMD} ${test_code_file_path} -o ${test_code_ll_file} - COMMAND ${CMAKE_CXX_COMPILER_LAUNCHER} opt -mem2reg -S ${test_code_ll_file} -o ${test_code_ll_file} + COMMAND ${CMAKE_CXX_COMPILER_LAUNCHER} ${OPT_TOOL} -mem2reg -S -opaque-pointers=0 ${test_code_ll_file} -o ${test_code_ll_file} COMMENT ${GEN_CMD_COMMENT} DEPENDS ${GEN_LL_FILE} VERBATIM From a9dc76f37e750579b75c50e5adc76a1def69ae6a Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 12:33:01 +0200 Subject: [PATCH 08/17] Introduce PSR_FWD --- include/phasar/DataFlow/IfdsIde/FlowFunctions.h | 3 ++- .../phasar/DataFlow/IfdsIde/Solver/IDESolver.h | 9 ++++----- include/phasar/Utils/Macros.h | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 include/phasar/Utils/Macros.h diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 83520ba07..16a6995bf 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -17,6 +17,7 @@ #ifndef PHASAR_DATAFLOW_IFDSIDE_FLOWFUNCTIONS_H #define PHASAR_DATAFLOW_IFDSIDE_FLOWFUNCTIONS_H +#include "phasar/Utils/Macros.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/ArrayRef.h" @@ -131,7 +132,7 @@ Container makeContainer(Range &&Rng) { Container C; reserveIfPossible(C, Rng.size()); for (auto &&Fact : Rng) { - C.insert(std::forward(Fact)); + C.insert(PSR_FWD(Fact)); } return C; } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index f771cbe3c..f7199df2d 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -40,6 +40,7 @@ #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/Macros.h" #include "phasar/Utils/Nullable.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Table.h" @@ -403,7 +404,7 @@ class IDESolver auto &&Fun = ICF->getFunctionOf(n); auto GetNextUse = [this, &Fun, &n](n_t nPrime, ByConstRef d3) { if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { - return psr::unwrapNullable(std::forward(NextUser)); + return psr::unwrapNullable(PSR_FWD(NextUser)); } return nPrime; @@ -612,8 +613,7 @@ class IDESolver auto DestN = [&, &n = n] { if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { - return psr::unwrapNullable( - std::forward(NextUser)); + return psr::unwrapNullable(PSR_FWD(NextUser)); } return nPrime; @@ -1016,8 +1016,7 @@ class IDESolver auto DestN = [&] { if (auto &&NextUser = getNextUserOrNull(Fun, d5_restoredCtx, c)) { - return psr::unwrapNullable( - std::forward(NextUser)); + return psr::unwrapNullable(PSR_FWD(NextUser)); } return RetSiteC; diff --git a/include/phasar/Utils/Macros.h b/include/phasar/Utils/Macros.h new file mode 100644 index 000000000..bb083365d --- /dev/null +++ b/include/phasar/Utils/Macros.h @@ -0,0 +1,15 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_MACROS_H +#define PHASAR_UTILS_MACROS_H + +#define PSR_FWD(...) ::std::forward(__VA_ARGS__) + +#endif // PHASAR_UTILS_MACROS_H From ca3b96885c860c32af4c613c4ef6b6237852c5c7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 14:31:32 +0200 Subject: [PATCH 09/17] Integrate sparse ifds taint analysis into phasar-cli --- .clang-tidy | 1 + CMakeLists.txt | 2 +- config/double-free-config.json | 28 ++ .../DataFlow/IfdsIde/Solver/IDESolver.h | 12 +- .../ControlFlow/SparseLLVMBasedCFG.h | 2 +- .../ControlFlow/SparseLLVMBasedICFG.h | 5 +- .../ControlFlow/SparseLLVMBasedICFGView.h | 72 +++++ .../PhasarLLVM/Utils/DataFlowAnalysisType.def | 1 + lib/Controller/AnalysisController.cpp | 3 + lib/Controller/AnalysisControllerInternal.h | 2 + .../AnalysisControllerInternalIDE.h | 42 ++- .../AnalysisControllerXSparseIFDSTaint.cpp | 20 ++ lib/PhasarLLVM/ControlFlow/SVFGCache.cpp | 233 ++++++++++++++++ lib/PhasarLLVM/ControlFlow/SVFGCache.h | 40 +++ .../ControlFlow/SparseLLVMBasedICFG.cpp | 250 +----------------- .../ControlFlow/SparseLLVMBasedICFGView.cpp | 67 +++++ 16 files changed, 521 insertions(+), 259 deletions(-) create mode 100644 config/double-free-config.json create mode 100644 include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h create mode 100644 lib/Controller/AnalysisControllerXSparseIFDSTaint.cpp create mode 100644 lib/PhasarLLVM/ControlFlow/SVFGCache.cpp create mode 100644 lib/PhasarLLVM/ControlFlow/SVFGCache.h create mode 100644 lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp diff --git a/.clang-tidy b/.clang-tidy index 54f04105c..5f0d1e9ef 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -17,6 +17,7 @@ Checks: '-*, -readability-convert-member-functions-to-static, -readability-isolate-declaration, -readability-identifier-length, + -readability-redundant-member-init, cppcoreguidelines-*, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, diff --git a/CMakeLists.txt b/CMakeLists.txt index eaf415b14..265874b92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ set(RELEASE_CONFIGURATIONS RELWITHDEBINFO RELEASE CACHE INTERNAL "" FORCE) # https://reviews.llvm.org/D157613 string(APPEND CMAKE_CXX_FLAGS " -MP -fstack-protector-strong -ffunction-sections -fdata-sections -pipe") -string(APPEND CMAKE_CXX_FLAGS_DEBUG " -Og -fno-omit-frame-pointer") +string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELEASE "") diff --git a/config/double-free-config.json b/config/double-free-config.json new file mode 100644 index 000000000..043b39228 --- /dev/null +++ b/config/double-free-config.json @@ -0,0 +1,28 @@ +{ + "name": "double-free", + "version": 1.0, + "functions": [ + { + "name": "free", + "params": { + "source": [ + 0 + ], + "sink": [ + 0 + ] + } + }, + { + "name": "_ZdlPv", + "params": { + "source": [ + 0 + ], + "sink": [ + 0 + ] + } + } + ] +} diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index f7199df2d..3e9669ca7 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -89,7 +89,8 @@ class IDESolver template IDESolver(IDETabulationProblem &Problem, const I *ICF) - : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), + : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), + ICF(&static_cast(*ICF)), SVFG(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), JumpFn(std::make_shared>()), @@ -97,9 +98,9 @@ class IDESolver assert(ICF != nullptr); if constexpr (has_getSparseCFG_v) { - NextUserOrNullCB = [](const i_t *ICF, ByConstRef Fun, + NextUserOrNullCB = [](const void *SVFG, ByConstRef Fun, ByConstRef d3, ByConstRef n) { - auto &&SCFG = static_cast(*ICF).getSparseCFG(Fun, d3); + auto &&SCFG = static_cast(SVFG)->getSparseCFG(Fun, d3); return SCFG.nextUserOrNull(n); }; } @@ -355,7 +356,7 @@ class IDESolver return {}; } - return NextUserOrNullCB(ICF, Fun, d3, n); + return NextUserOrNullCB(SVFG, Fun, d3, n); } /// Lines 13-20 of the algorithm; processing a call site in the caller's @@ -1863,8 +1864,9 @@ class IDESolver IDETabulationProblem &IDEProblem; d_t ZeroValue; const i_t *ICF; + const void *SVFG; IFDSIDESolverConfig &SolverConfig; - Nullable (*NextUserOrNullCB)(const i_t *, ByConstRef, + Nullable (*NextUserOrNullCB)(const void *, ByConstRef, ByConstRef, ByConstRef) = nullptr; std::vector, EdgeFunction>> WorkList; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index e32d5aa4c..8645f5c72 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -25,7 +25,7 @@ template <> struct CFGTraits : CFGTraits { class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { - friend class SparseLLVMBasedICFG; + friend struct SVFGCache; friend SparseCFGBase; public: diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 03a7db3dc..8233f00c8 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -17,6 +17,7 @@ namespace psr { class SparseLLVMBasedCFG; +struct SVFGCache; class SparseLLVMBasedICFG : public LLVMBasedICFG, @@ -46,9 +47,7 @@ class SparseLLVMBasedICFG [[nodiscard]] const SparseLLVMBasedCFG & getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; - struct CacheData; - - std::unique_ptr SparseCFGCache; + std::unique_ptr SparseCFGCache; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h new file mode 100644 index 000000000..11ecad668 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H + +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/ControlFlow/ICFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" +#include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" + +#include + +namespace psr { +class LLVMProjectIRDB; +class LLVMBasedICFG; +class SparseLLVMBasedCFG; +class SparseLLVMBasedICFGView; +struct SVFGCache; + +template <> +struct CFGTraits : CFGTraits {}; + +/// Similar to SparseLLVMBasedICFG; the only difference is that this one *is* no +/// LLVMBasedICFG -- it contains a pointer to an already existing one. +/// It still owns the sparse value-flow graphs +class SparseLLVMBasedICFGView + : public LLVMBasedCFG, + public ICFGBase, + public SparseLLVMBasedCFGProvider { + friend ICFGBase; + friend SparseLLVMBasedCFGProvider; + +public: + explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF); + + ~SparseLLVMBasedICFGView(); + + // To make the IDESolver happy... + operator const LLVMBasedICFG &() const noexcept { return *ICF; } + +private: + [[nodiscard]] FunctionRange getAllFunctionsImpl() const; + [[nodiscard]] f_t getFunctionImpl(llvm::StringRef Fun) const; + + [[nodiscard]] bool isIndirectFunctionCallImpl(n_t Inst) const; + [[nodiscard]] bool isVirtualFunctionCallImpl(n_t Inst) const; + [[nodiscard]] std::vector allNonCallStartNodesImpl() const; + [[nodiscard]] llvm::SmallVector getCallsFromWithinImpl(f_t Fun) const; + [[nodiscard]] llvm::SmallVector + getReturnSitesOfCallAtImpl(n_t Inst) const; + void printImpl(llvm::raw_ostream &OS) const; + [[nodiscard]] nlohmann::json getAsJsonImpl() const; + [[nodiscard]] const CallGraph &getCallGraphImpl() const noexcept; + + [[nodiscard]] const SparseLLVMBasedCFG & + getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; + + const LLVMProjectIRDB *IRDB{}; + const LLVMBasedICFG *ICF{}; + std::unique_ptr SparseCFGCache; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index 08cf9e9e0..564fb245b 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -14,6 +14,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSUninitializedVariables, "ifds-uninit", "Find usages of uninitialized variables.") DATA_FLOW_ANALYSIS_TYPES(IFDSConstAnalysis, "ifds-const", "Find variables that are actually mutated through the program") DATA_FLOW_ANALYSIS_TYPES(IFDSTaintAnalysis, "ifds-taint", "Simple, alias-aware taint-analysis. Use with --analysis-config") +DATA_FLOW_ANALYSIS_TYPES(SparseIFDSTaintAnalysis, "sparse-ifds-taint", "Simple, alias-aware taint-analysis utilizing SparseIFDS. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IDEExtendedTaintAnalysis, "ide-xtaint", "More advanced alias-aware taint analysis that provides limited field-sensitivity. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IFDSTypeAnalysis, "ifds-type", "Simple type analysis") DATA_FLOW_ANALYSIS_TYPES(IDECSTDIOTypeStateAnalysis, "ide-stdio-ts", "Find invalid usages of the libc file-io") diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index fd77d2ec5..387498a42 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -128,6 +128,9 @@ static void executeWholeProgram(AnalysisController::ControllerData &Data) { case DataFlowAnalysisType::IFDSTaintAnalysis: executeIFDSTaint(Data); continue; + case DataFlowAnalysisType::SparseIFDSTaintAnalysis: + executeSparseIFDSTaint(Data); + continue; case DataFlowAnalysisType::IDEExtendedTaintAnalysis: executeIDEXTaint(Data); continue; diff --git a/lib/Controller/AnalysisControllerInternal.h b/lib/Controller/AnalysisControllerInternal.h index 8a736e3a1..d6f88ba4f 100644 --- a/lib/Controller/AnalysisControllerInternal.h +++ b/lib/Controller/AnalysisControllerInternal.h @@ -35,6 +35,8 @@ executeIFDSConst(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSTaint(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void +executeSparseIFDSTaint(AnalysisController::ControllerData &Data); +LLVM_LIBRARY_VISIBILITY void executeIFDSType(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSSolverTest(AnalysisController::ControllerData &Data); diff --git a/lib/Controller/AnalysisControllerInternalIDE.h b/lib/Controller/AnalysisControllerInternalIDE.h index fa758bbb6..ede0b8fc3 100644 --- a/lib/Controller/AnalysisControllerInternalIDE.h +++ b/lib/Controller/AnalysisControllerInternalIDE.h @@ -12,6 +12,8 @@ #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" #include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h" #include "AnalysisControllerInternal.h" @@ -23,12 +25,13 @@ static void statsEmitter(llvm::raw_ostream &OS, const IDESolver &Solver) { Solver.printEdgeFunctionStatistics(OS); } -template -static void executeIfdsIdeAnalysis(AnalysisController::ControllerData &Data, - ArgTys &&...Args) { +template +static void executeIfdsIdeAnalysisImpl(AnalysisController::ControllerData &Data, + const ICFGTy &ICF, ArgTys &&...Args) { auto Problem = createAnalysisProblem(*Data.HA, std::forward(Args)...); - SolverTy Solver(Problem, &Data.HA->getICFG()); + SolverTy Solver(Problem, &ICF); { std::optional MeasureTime; if (Data.EmitterOptions & @@ -43,6 +46,23 @@ static void executeIfdsIdeAnalysis(AnalysisController::ControllerData &Data, emitRequestedDataFlowResults(Data, Solver); } +template +static void executeIfdsIdeAnalysis(AnalysisController::ControllerData &Data, + ArgTys &&...Args) { + executeIfdsIdeAnalysisImpl( + Data, Data.HA->getICFG(), std::forward(Args)...); +} + +template +static void +executeSparseIfdsIdeAnalysis(AnalysisController::ControllerData &Data, + ArgTys &&...Args) { + + SparseLLVMBasedICFGView SVFG(&Data.HA->getICFG()); + executeIfdsIdeAnalysisImpl( + Data, SVFG, std::forward(Args)...); +} + template static void executeIFDSAnalysis(AnalysisController::ControllerData &Data, ArgTys &&...Args) { @@ -50,6 +70,13 @@ static void executeIFDSAnalysis(AnalysisController::ControllerData &Data, Data, std::forward(Args)...); } +template +static void executeSparseIFDSAnalysis(AnalysisController::ControllerData &Data, + ArgTys &&...Args) { + executeSparseIfdsIdeAnalysis, ProblemTy>( + Data, std::forward(Args)...); +} + template static void executeIDEAnalysis(AnalysisController::ControllerData &Data, ArgTys &&...Args) { @@ -57,6 +84,13 @@ static void executeIDEAnalysis(AnalysisController::ControllerData &Data, Data, std::forward(Args)...); } +template +static void executeSparseIDEAnalysis(AnalysisController::ControllerData &Data, + ArgTys &&...Args) { + executeSparseIfdsIdeAnalysis, ProblemTy>( + Data, std::forward(Args)...); +} + } // namespace psr::controller #endif // PHASAR_CONTROLLER_ANALYSISCONTROLLERINTERNALMONO_H diff --git a/lib/Controller/AnalysisControllerXSparseIFDSTaint.cpp b/lib/Controller/AnalysisControllerXSparseIFDSTaint.cpp new file mode 100644 index 000000000..7efcb8614 --- /dev/null +++ b/lib/Controller/AnalysisControllerXSparseIFDSTaint.cpp @@ -0,0 +1,20 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" + +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeSparseIFDSTaint( + AnalysisController::ControllerData &Data) { + auto Config = makeTaintConfig(Data); + executeSparseIFDSAnalysis(Data, &Config, Data.EntryPoints); +} diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp new file mode 100644 index 000000000..2741c260d --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp @@ -0,0 +1,233 @@ +#include "SVFGCache.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" + +#include "llvm/IR/IntrinsicInst.h" + +using namespace psr; + +static const llvm::Type *getPointeeTypeOrNull(const llvm::Value *V) { + // TODO + if (const auto *Alloca = llvm::dyn_cast(V)) { + return Alloca->getAllocatedType(); + } + if (const auto *Arg = llvm::dyn_cast(V)) { + if (const auto *ByValTy = Arg->getParamByValType()) { + return ByValTy; + } + if (const auto *ByValTy = Arg->getParamStructRetType()) { + return ByValTy; + } + } + + // TODO: Handle more cases + + return nullptr; +} + +static bool isNonPointerType(const llvm::Type *Ty) { + if (const auto *Struct = llvm::dyn_cast(Ty)) { + for (const auto *ElemTy : Struct->elements()) { + // TODO: Go into nested structs recursively + if (!ElemTy->isSingleValueType() || ElemTy->isVectorTy()) { + return false; + } + } + return true; + } + if (const auto *Vec = llvm::dyn_cast(Ty)) { + return !Vec->getElementType()->isPointerTy(); + } + return Ty->isSingleValueType(); +} + +static bool isNonAddressTakenVariable(const llvm::Value *Val) { + const auto *Alloca = llvm::dyn_cast(Val); + if (!Alloca) { + return false; + } + for (const auto &Use : Alloca->uses()) { + if (const auto *Store = llvm::dyn_cast(Use.getUser())) { + if (Use == Store->getValueOperand()) { + return false; + } + } else if (const auto *Call = + llvm::dyn_cast(Use.getUser())) { + auto ArgNo = Use.getOperandNo(); + if (Call->paramHasAttr(ArgNo, llvm::Attribute::StructRet)) { + continue; + } + if (Call->paramHasAttr(ArgNo, llvm::Attribute::NoCapture) && + isNonPointerType(Call->getType())) { + continue; + } + return false; + } + } + return true; +} + +static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Type *PointeeTy1, + const llvm::Value *Ptr2, + const llvm::Type *PointeeTy2) { + // Pointers to pointers may alias with any pointer, because the analysis may + // not be field-sensitive. + // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume + // anything. + + if (!PointeeTy1 || PointeeTy1->isPointerTy()) { + return true; + } + + if (!PointeeTy2 || PointeeTy2->isPointerTy()) { + return true; + } + + if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { + return false; + } + + return PointeeTy1 == PointeeTy2; +} + +static bool isFirstInBB(const llvm::Instruction *Inst) { + return !Inst->getPrevNode(); +} + +static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { + if (Inst->getNextNode()) { + return false; + } + + if (Val->getType()->isPointerTy()) { + return true; + } + + const auto *InstBB = Inst->getParent(); + for (const auto *User : Val->users()) { + const auto *UserInst = llvm::dyn_cast(User); + if (!UserInst || UserInst->getParent() != InstBB) { + return true; + } + } + return llvm::succ_empty(Inst); +} + +static bool shouldKeepInst(const llvm::Instruction *Inst, + const llvm::Value *Val) { + if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { + // First in BB always stays for now + + // llvm::errs() << "[shouldKeepInst]: 1: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; + return true; + } + + const auto *ValTy = Val->getType(); + bool ValPtr = ValTy->isPointerTy(); + const auto *PointeeTy = ValPtr ? getPointeeTypeOrNull(Val) : nullptr; + + if (const auto *Call = llvm::dyn_cast(Inst)) { + if (llvm::isa(Val)) { + // llvm::errs() << "[shouldKeepInst]: 2: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; + return true; + } + } + + for (const auto *Op : Inst->operand_values()) { + if (Op == Val) { + // llvm::errs() << "[shouldKeepInst]: 3.1: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; + return true; + } + if (!ValPtr) { + continue; + } + const auto *OpTy = Op->getType(); + bool OpPtr = OpTy->isPointerTy(); + + if (!OpPtr) { + // Pointers cannot influence non-pointers + continue; + } + + if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { + // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; + return true; + } + } + + // llvm::errs() << "[shouldKeepInst]: FALSE: " << llvmIRToString(Inst) + // << " :: " << llvmIRToShortString(Val) << '\n'; + // TODO + return false; +} + +static void buildSparseCFG(const LLVMBasedCFG &CFG, + SparseLLVMBasedCFG::vgraph_t &SCFG, + const llvm::Function *Fun, const llvm::Value *Val) { + + // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " + // << llvmIRToString(Val) << '\n'; + llvm::SmallVector< + std::pair> + WL; + + // -- Initialization + + const auto *Entry = &Fun->getEntryBlock().front(); + if (llvm::isa(Entry)) { + Entry = Entry->getNextNonDebugInstruction(); + } + + for (const auto *Succ : CFG.getSuccsOf(Entry)) { + WL.emplace_back(Entry, Succ); + } + + // -- Fixpoint Iteration + + llvm::SmallDenseSet Handled; + + while (!WL.empty()) { + auto [From, To] = WL.pop_back_val(); + + const auto *Curr = From; + if (shouldKeepInst(To, Val)) { + Curr = To; + auto [It, Inserted] = SCFG.try_emplace(From, To); + if (!Inserted) { + if (It->second != To) { + // llvm::errs() << "[buildSparseCFG]: Ambiguity at " + // << llvmIRToString(From) << " ::> " + // << llvmIRToShortString(It->second) << " VS " + // << llvmIRToShortString(To) << '\n'; + It->second = nullptr; + } + } + } + + if (!Handled.insert(To).second) { + continue; + } + + for (const auto *Succ : CFG.getSuccsOf(To)) { + WL.emplace_back(Curr, Succ); + } + } +} + +const SparseLLVMBasedCFG &SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, + const llvm::Function *Fun, + const llvm::Value *Val) { + // TODO: Make thread-safe + + auto [It, Inserted] = Cache.try_emplace(std::make_pair(Fun, Val)); + if (Inserted) { + buildSparseCFG(CFG, It->second.VGraph, Fun, Val); + } + + return It->second; +} diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.h b/lib/PhasarLLVM/ControlFlow/SVFGCache.h new file mode 100644 index 000000000..6270f185a --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H + +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" + +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" + +#include + +namespace psr { +struct FVHasher { + auto operator()(std::pair FV) + const noexcept { + return llvm::hash_value(FV); + } +}; + +struct SVFGCache { + using f_t = const llvm::Function *; + using v_t = const llvm::Value *; + std::unordered_map, SparseLLVMBasedCFG, FVHasher> Cache{}; + + LLVM_LIBRARY_VISIBILITY const SparseLLVMBasedCFG & + getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, + const llvm::Value *Val); +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSECFGCACHE_H diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index 5eae35b06..f677aeae6 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -1,22 +1,8 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" -#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" -#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" - -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" +#include "SVFGCache.h" #include -#include #include using namespace psr; @@ -28,10 +14,6 @@ struct FVHasher { } }; -struct SparseLLVMBasedICFG::CacheData { - std::unordered_map, SparseLLVMBasedCFG, FVHasher> Cache{}; -}; - SparseLLVMBasedICFG::~SparseLLVMBasedICFG() = default; SparseLLVMBasedICFG::SparseLLVMBasedICFG( @@ -39,243 +21,21 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG( llvm::ArrayRef EntryPoints, LLVMTypeHierarchy *TH, LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), - SparseCFGCache(new CacheData{}) {} + SparseCFGCache(new SVFGCache{}) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, LLVMTypeHierarchy *TH) - : LLVMBasedICFG(std::move(CG), IRDB, TH), SparseCFGCache(new CacheData{}) {} + : LLVMBasedICFG(std::move(CG), IRDB, TH), SparseCFGCache(new SVFGCache{}) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedCG, LLVMTypeHierarchy *TH) - : LLVMBasedICFG(IRDB, SerializedCG, TH), SparseCFGCache(new CacheData{}) {} - -static const llvm::Type *getPointeeTypeOrNull(const llvm::Value *V) { - // TODO - if (const auto *Alloca = llvm::dyn_cast(V)) { - return Alloca->getAllocatedType(); - } - if (const auto *Arg = llvm::dyn_cast(V)) { - if (const auto *ByValTy = Arg->getParamByValType()) { - return ByValTy; - } - if (const auto *ByValTy = Arg->getParamStructRetType()) { - return ByValTy; - } - } - - // TODO: Handle more cases - - return nullptr; -} - -static bool isNonPointerType(const llvm::Type *Ty) { - if (const auto *Struct = llvm::dyn_cast(Ty)) { - for (const auto *ElemTy : Struct->elements()) { - // TODO: Go into nested structs recursively - if (!ElemTy->isSingleValueType() || ElemTy->isVectorTy()) { - return false; - } - } - return true; - } - if (const auto *Vec = llvm::dyn_cast(Ty)) { - return !Vec->getElementType()->isPointerTy(); - } - return Ty->isSingleValueType(); -} - -static bool isNonAddressTakenVariable(const llvm::Value *Val) { - const auto *Alloca = llvm::dyn_cast(Val); - if (!Alloca) { - return false; - } - for (const auto &Use : Alloca->uses()) { - if (const auto *Store = llvm::dyn_cast(Use.getUser())) { - if (Use == Store->getValueOperand()) { - return false; - } - } else if (const auto *Call = - llvm::dyn_cast(Use.getUser())) { - auto ArgNo = Use.getOperandNo(); - if (Call->paramHasAttr(ArgNo, llvm::Attribute::StructRet)) { - continue; - } - if (Call->paramHasAttr(ArgNo, llvm::Attribute::NoCapture) && - isNonPointerType(Call->getType())) { - continue; - } - return false; - } - } - return true; -} - -static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Type *PointeeTy1, - const llvm::Value *Ptr2, - const llvm::Type *PointeeTy2) { - // Pointers to pointers may alias with any pointer, because the analysis may - // not be field-sensitive. - // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume - // anything. - - if (!PointeeTy1 || PointeeTy1->isPointerTy()) { - return true; - } - - if (!PointeeTy2 || PointeeTy2->isPointerTy()) { - return true; - } - - if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { - return false; - } - - return PointeeTy1 == PointeeTy2; -} - -static bool isFirstInBB(const llvm::Instruction *Inst) { - return !Inst->getPrevNode(); -} - -static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { - if (Inst->getNextNode()) { - return false; - } - - if (Val->getType()->isPointerTy()) { - return true; - } - - const auto *InstBB = Inst->getParent(); - for (const auto *User : Val->users()) { - const auto *UserInst = llvm::dyn_cast(User); - if (!UserInst || UserInst->getParent() != InstBB) { - return true; - } - } - return llvm::succ_empty(Inst); -} - -static bool shouldKeepInst(const llvm::Instruction *Inst, - const llvm::Value *Val) { - if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { - // First in BB always stays for now - - // llvm::errs() << "[shouldKeepInst]: 1: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - return true; - } - - const auto *ValTy = Val->getType(); - bool ValPtr = ValTy->isPointerTy(); - const auto *PointeeTy = ValPtr ? getPointeeTypeOrNull(Val) : nullptr; - - if (const auto *Call = llvm::dyn_cast(Inst)) { - if (llvm::isa(Val)) { - // llvm::errs() << "[shouldKeepInst]: 2: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - return true; - } - } - - for (const auto *Op : Inst->operand_values()) { - if (Op == Val) { - // llvm::errs() << "[shouldKeepInst]: 3.1: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - return true; - } - if (!ValPtr) { - continue; - } - const auto *OpTy = Op->getType(); - bool OpPtr = OpTy->isPointerTy(); - - if (!OpPtr) { - // Pointers cannot influence non-pointers - continue; - } - - if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { - // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - return true; - } - } - - // llvm::errs() << "[shouldKeepInst]: FALSE: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - // TODO - return false; -} - -static void buildSparseCFG(const LLVMBasedCFG &CFG, - SparseLLVMBasedCFG::vgraph_t &SCFG, - const llvm::Function *Fun, const llvm::Value *Val) { - - // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " - // << llvmIRToString(Val) << '\n'; - llvm::SmallVector< - std::pair> - WL; - - // -- Initialization - - const auto *Entry = &Fun->getEntryBlock().front(); - if (llvm::isa(Entry)) { - Entry = Entry->getNextNonDebugInstruction(); - } - - for (const auto *Succ : CFG.getSuccsOf(Entry)) { - WL.emplace_back(Entry, Succ); - } - - // -- Fixpoint Iteration - - llvm::SmallDenseSet Handled; - - while (!WL.empty()) { - auto [From, To] = WL.pop_back_val(); - - const auto *Curr = From; - if (shouldKeepInst(To, Val)) { - Curr = To; - auto [It, Inserted] = SCFG.try_emplace(From, To); - if (!Inserted) { - if (It->second != To) { - // llvm::errs() << "[buildSparseCFG]: Ambiguity at " - // << llvmIRToString(From) << " ::> " - // << llvmIRToShortString(It->second) << " VS " - // << llvmIRToShortString(To) << '\n'; - It->second = nullptr; - } - } - } - - if (!Handled.insert(To).second) { - continue; - } - - for (const auto *Succ : CFG.getSuccsOf(To)) { - WL.emplace_back(Curr, Succ); - } - } -} + : LLVMBasedICFG(IRDB, SerializedCG, TH), SparseCFGCache(new SVFGCache{}) {} const SparseLLVMBasedCFG & SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const { assert(SparseCFGCache != nullptr); - - // TODO: Make thread-safe - - auto [It, Inserted] = - SparseCFGCache->Cache.try_emplace(std::make_pair(Fun, Val)); - if (Inserted) { - buildSparseCFG(*this, It->second.VGraph, Fun, Val); - // llvm::errs() << "\n"; - } - - return It->second; + return SparseCFGCache->getOrCreate(*this, Fun, Val); } diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp new file mode 100644 index 000000000..69c8cc827 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp @@ -0,0 +1,67 @@ +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" + +#include "SVFGCache.h" + +using namespace psr; + +SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF) + : IRDB(ICF->getIRDB()), ICF(ICF), SparseCFGCache(new SVFGCache{}) { + // +} + +SparseLLVMBasedICFGView::~SparseLLVMBasedICFGView() = default; + +FunctionRange SparseLLVMBasedICFGView::getAllFunctionsImpl() const { + return IRDB->getAllFunctions(); +} + +auto SparseLLVMBasedICFGView::getFunctionImpl(llvm::StringRef Fun) const + -> f_t { + return IRDB->getFunction(Fun); +}; + +bool SparseLLVMBasedICFGView::isIndirectFunctionCallImpl(n_t Inst) const { + return ICF->isIndirectFunctionCall(Inst); +} + +bool SparseLLVMBasedICFGView::isVirtualFunctionCallImpl(n_t Inst) const { + return ICF->isVirtualFunctionCall(Inst); +} + +auto SparseLLVMBasedICFGView::allNonCallStartNodesImpl() const + -> std::vector { + return ICF->allNonCallStartNodes(); +} + +auto SparseLLVMBasedICFGView::getCallsFromWithinImpl(f_t Fun) const + -> llvm::SmallVector { + return ICF->getCallsFromWithin(Fun); +} + +auto SparseLLVMBasedICFGView::getReturnSitesOfCallAtImpl(n_t Inst) const + -> llvm::SmallVector { + return ICF->getReturnSitesOfCallAt(Inst); +} + +void SparseLLVMBasedICFGView::printImpl(llvm::raw_ostream &OS) const { + ICF->print(OS); +} + +nlohmann::json SparseLLVMBasedICFGView::getAsJsonImpl() const { + return ICF->getAsJson(); +} + +auto SparseLLVMBasedICFGView::getCallGraphImpl() const noexcept + -> const CallGraph & { + return ICF->getCallGraph(); +} + +const SparseLLVMBasedCFG & +SparseLLVMBasedICFGView::getSparseCFGImpl(const llvm::Function *Fun, + const llvm::Value *Val) const { + assert(SparseCFGCache != nullptr); + return SparseCFGCache->getOrCreate(*this, Fun, Val); +} From 9523fe5541c765786891f7590706522d9b716e5a Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 14:38:51 +0200 Subject: [PATCH 10/17] Fix build after merge from dev +relax opt path requirement --- cmake/phasar_macros.cmake | 2 +- .../PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h | 6 ++---- lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp | 10 ++++------ 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index ecf0be9ca..750c95b9c 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -131,7 +131,7 @@ function(generate_ll_file) else() set(COMPILER_PATH) endif() - find_program(OPT_TOOL opt REQUIRED ${COMPILER_PATH}) + find_program(OPT_TOOL opt REQUIRED HINTS ${COMPILER_PATH}) add_custom_command( OUTPUT ${test_code_ll_file} diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 8233f00c8..bf5184350 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -34,12 +34,10 @@ class SparseLLVMBasedICFG bool IncludeGlobals = true); /// Creates an ICFG with an already given call-graph - explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, - LLVMTypeHierarchy *TH = nullptr); + explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB); explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, - const nlohmann::json &SerializedCG, - LLVMTypeHierarchy *TH = nullptr); + const nlohmann::json &SerializedCG); ~SparseLLVMBasedICFG(); diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index f677aeae6..6d4e9db47 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -24,14 +24,12 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG( SparseCFGCache(new SVFGCache{}) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, - LLVMProjectIRDB *IRDB, - LLVMTypeHierarchy *TH) - : LLVMBasedICFG(std::move(CG), IRDB, TH), SparseCFGCache(new SVFGCache{}) {} + LLVMProjectIRDB *IRDB) + : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, - const nlohmann::json &SerializedCG, - LLVMTypeHierarchy *TH) - : LLVMBasedICFG(IRDB, SerializedCG, TH), SparseCFGCache(new SVFGCache{}) {} + const nlohmann::json &SerializedCG) + : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}) {} const SparseLLVMBasedCFG & SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, From c4994e03c575742931f43b7179d479684e114750 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 14:45:03 +0200 Subject: [PATCH 11/17] Fix opt-tool selection --- cmake/phasar_macros.cmake | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index 750c95b9c..45edaf252 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -127,11 +127,10 @@ function(generate_ll_file) if(GEN_LL_MEM2REG) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") get_filename_component(COMPILER_PATH_STR ${CMAKE_CXX_COMPILER} DIRECTORY) - set(COMPILER_PATH PATHS ${COMPILER_PATH_STR}) + find_program(OPT_TOOL opt REQUIRED HINTS ${COMPILER_PATH_STR}) else() - set(COMPILER_PATH) + find_program(OPT_TOOL opt REQUIRED) endif() - find_program(OPT_TOOL opt REQUIRED HINTS ${COMPILER_PATH}) add_custom_command( OUTPUT ${test_code_ll_file} From cf14f78c0ef46d2bf5a5bf3404221077d1d87afe Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 9 Jun 2024 14:47:25 +0200 Subject: [PATCH 12/17] fallback --- cmake/phasar_macros.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index 45edaf252..e44a61e00 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -127,9 +127,13 @@ function(generate_ll_file) if(GEN_LL_MEM2REG) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") get_filename_component(COMPILER_PATH_STR ${CMAKE_CXX_COMPILER} DIRECTORY) - find_program(OPT_TOOL opt REQUIRED HINTS ${COMPILER_PATH_STR}) + find_program(OPT_TOOL opt HINTS ${COMPILER_PATH_STR}) else() - find_program(OPT_TOOL opt REQUIRED) + find_program(OPT_TOOL opt) + endif() + + if(NOT OPT_TOOL) + set(OPT_TOOL opt) endif() add_custom_command( From 0d0b59250f8d7cbb1dd9496d1a04be5a4180f7ac Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 18 Oct 2024 11:32:18 +0200 Subject: [PATCH 13/17] Fixed seqfault if find() = end() --- .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index 1b54e5b16..85490d7f5 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -262,9 +262,8 @@ transferAndKillTwoFlows(d_t To, d_t From1, d_t From2) { }); } -auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, - [[maybe_unused]] n_t Succ) - -> FlowFunctionPtrType { +auto IFDSTaintAnalysis::getNormalFlowFunction( + n_t Curr, [[maybe_unused]] n_t Succ) -> FlowFunctionPtrType { // If a tainted value is stored, the store location must be tainted too if (const auto *Store = llvm::dyn_cast(Curr)) { container_type Gen; @@ -329,10 +328,9 @@ auto IFDSTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) return mapFactsToCallee(CS, DestFun); } -auto IFDSTaintAnalysis::getRetFlowFunction(n_t CallSite, f_t /*CalleeFun*/, - n_t ExitStmt, - [[maybe_unused]] n_t RetSite) - -> FlowFunctionPtrType { +auto IFDSTaintAnalysis::getRetFlowFunction( + n_t CallSite, f_t /*CalleeFun*/, n_t ExitStmt, + [[maybe_unused]] n_t RetSite) -> FlowFunctionPtrType { // We must check if the return value and formal parameter are tainted, if so // we must taint all user's of the function call. We are only interested in // formal parameters of pointer/reference type. @@ -349,10 +347,9 @@ auto IFDSTaintAnalysis::getRetFlowFunction(n_t CallSite, f_t /*CalleeFun*/, // All other stuff is killed at this point } -auto IFDSTaintAnalysis::getCallToRetFlowFunction(n_t CallSite, - [[maybe_unused]] n_t RetSite, - llvm::ArrayRef Callees) - -> FlowFunctionPtrType { +auto IFDSTaintAnalysis::getCallToRetFlowFunction( + n_t CallSite, [[maybe_unused]] n_t RetSite, + llvm::ArrayRef Callees) -> FlowFunctionPtrType { const auto *CS = llvm::cast(CallSite); @@ -416,12 +413,14 @@ auto IFDSTaintAnalysis::getSummaryFlowFunction([[maybe_unused]] n_t CallSite, llvm::zip(CS->args(), DestFun->args())) { if (Source == Arg.get()) { auto VecFacts = DestFunFacts.find(DestParam.getArgNo()); - for (const auto &VecFact : VecFacts->second) { - if (const auto *Param = - std::get_if(&VecFact.Fact)) { - Facts.insert(CS->getArgOperand(Param->Index)); - } else { - Facts.insert(CallSite); + if (VecFacts != DestFunFacts.end()) { + for (const auto &VecFact : VecFacts->second) { + if (const auto *Param = std::get_if( + &VecFact.Fact)) { + Facts.insert(CS->getArgOperand(Param->Index)); + } else { + Facts.insert(CallSite); + } } } } From 8f820b301828b1e09020c8dcea9e5f356a2a10d9 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Tue, 29 Oct 2024 14:59:24 +0100 Subject: [PATCH 14/17] AliasAnalysis for fuzzyMayAlias --- .../ControlFlow/SparseLLVMBasedICFG.h | 3 ++ .../ControlFlow/SparseLLVMBasedICFGView.h | 2 ++ lib/PhasarLLVM/ControlFlow/SVFGCache.cpp | 36 +++++++++---------- lib/PhasarLLVM/ControlFlow/SVFGCache.h | 4 ++- .../ControlFlow/SparseLLVMBasedICFG.cpp | 13 ++++--- .../ControlFlow/SparseLLVMBasedICFGView.cpp | 6 ++-- 6 files changed, 37 insertions(+), 27 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index bf5184350..a5ca13887 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -12,6 +12,7 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include @@ -46,6 +47,8 @@ class SparseLLVMBasedICFG getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; std::unique_ptr SparseCFGCache; + LLVMProjectIRDB *IRDB{}; + std::shared_ptr AliasAnalysis; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index 11ecad668..d1fadd751 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -14,6 +14,7 @@ #include "phasar/ControlFlow/ICFGBase.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" #include @@ -66,6 +67,7 @@ class SparseLLVMBasedICFGView const LLVMProjectIRDB *IRDB{}; const LLVMBasedICFG *ICF{}; std::unique_ptr SparseCFGCache; + std::shared_ptr AliasAnalysis; }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp index 2741c260d..c9dcc1c3b 100644 --- a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp @@ -2,8 +2,10 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/Pointer/AliasAnalysisType.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Casting.h" using namespace psr; @@ -68,27 +70,18 @@ static bool isNonAddressTakenVariable(const llvm::Value *Val) { return true; } -static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Type *PointeeTy1, - const llvm::Value *Ptr2, - const llvm::Type *PointeeTy2) { +static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, + const std::shared_ptr &AliasAnalysis) { // Pointers to pointers may alias with any pointer, because the analysis may // not be field-sensitive. // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume // anything. - if (!PointeeTy1 || PointeeTy1->isPointerTy()) { - return true; - } - - if (!PointeeTy2 || PointeeTy2->isPointerTy()) { - return true; - } - if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { return false; } - return PointeeTy1 == PointeeTy2; + return AliasAnalysis->alias(Ptr1, Ptr2) == AliasResult::MayAlias; } static bool isFirstInBB(const llvm::Instruction *Inst) { @@ -115,7 +108,8 @@ static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { } static bool shouldKeepInst(const llvm::Instruction *Inst, - const llvm::Value *Val) { + const llvm::Value *Val, + const std::shared_ptr &AliasAnalysis) { if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { // First in BB always stays for now @@ -153,7 +147,7 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, continue; } - if (fuzzyMayAlias(Val, PointeeTy, Op, getPointeeTypeOrNull(Op))) { + if (fuzzyMayAlias(Val, Op, AliasAnalysis)) { // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) // << " :: " << llvmIRToShortString(Val) << '\n'; return true; @@ -168,7 +162,8 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG::vgraph_t &SCFG, - const llvm::Function *Fun, const llvm::Value *Val) { + const llvm::Function *Fun, const llvm::Value *Val, + const std::shared_ptr &AliasAnalysis) { // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " // << llvmIRToString(Val) << '\n'; @@ -195,7 +190,7 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, auto [From, To] = WL.pop_back_val(); const auto *Curr = From; - if (shouldKeepInst(To, Val)) { + if (shouldKeepInst(To, Val, AliasAnalysis)) { Curr = To; auto [It, Inserted] = SCFG.try_emplace(From, To); if (!Inserted) { @@ -219,14 +214,15 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, } } -const SparseLLVMBasedCFG &SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, - const llvm::Function *Fun, - const llvm::Value *Val) { +const SparseLLVMBasedCFG & +SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, + const llvm::Value *Val, + const std::shared_ptr &AliasAnalysis) { // TODO: Make thread-safe auto [It, Inserted] = Cache.try_emplace(std::make_pair(Fun, Val)); if (Inserted) { - buildSparseCFG(CFG, It->second.VGraph, Fun, Val); + buildSparseCFG(CFG, It->second.VGraph, Fun, Val, AliasAnalysis); } return It->second; diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.h b/lib/PhasarLLVM/ControlFlow/SVFGCache.h index 6270f185a..e981e344f 100644 --- a/lib/PhasarLLVM/ControlFlow/SVFGCache.h +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "llvm/IR/Function.h" #include "llvm/Support/Compiler.h" @@ -32,7 +33,8 @@ struct SVFGCache { LLVM_LIBRARY_VISIBILITY const SparseLLVMBasedCFG & getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, - const llvm::Value *Val); + const llvm::Value *Val, + const std::shared_ptr &AliasAnalysis); }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index 6d4e9db47..b8a73abf4 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -1,5 +1,7 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + #include "SVFGCache.h" #include @@ -21,19 +23,22 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG( llvm::ArrayRef EntryPoints, LLVMTypeHierarchy *TH, LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), - SparseCFGCache(new SVFGCache{}) {} + SparseCFGCache(new SVFGCache{}), IRDB(IRDB), + AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB) - : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}) {} + : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}), + IRDB(IRDB), AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedCG) - : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}) {} + : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}), + IRDB(IRDB), AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} const SparseLLVMBasedCFG & SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const { assert(SparseCFGCache != nullptr); - return SparseCFGCache->getOrCreate(*this, Fun, Val); + return SparseCFGCache->getOrCreate(*this, Fun, Val, AliasAnalysis); } diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp index 69c8cc827..854dd230d 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp @@ -2,13 +2,15 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "SVFGCache.h" using namespace psr; SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF) - : IRDB(ICF->getIRDB()), ICF(ICF), SparseCFGCache(new SVFGCache{}) { + : IRDB(ICF->getIRDB()), ICF(ICF), SparseCFGCache(new SVFGCache{}), + AliasAnalysis(new LLVMAliasSet(ICF->getIRDB(), false)) { // } @@ -63,5 +65,5 @@ const SparseLLVMBasedCFG & SparseLLVMBasedICFGView::getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const { assert(SparseCFGCache != nullptr); - return SparseCFGCache->getOrCreate(*this, Fun, Val); + return SparseCFGCache->getOrCreate(*this, Fun, Val, AliasAnalysis); } From 3dbc53002ae5e4bf1bb6ad5ebf74770365e47269 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 30 Oct 2024 10:32:29 +0100 Subject: [PATCH 15/17] changed to LLVMAliasInfoRef --- .../PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h | 8 +++++--- .../ControlFlow/SparseLLVMBasedICFGView.h | 7 ++++--- lib/PhasarLLVM/ControlFlow/SVFGCache.cpp | 13 ++++++------- lib/PhasarLLVM/ControlFlow/SVFGCache.h | 5 ++--- lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp | 13 +++++++------ .../ControlFlow/SparseLLVMBasedICFGView.cpp | 5 +++-- .../Controller/AnalysisControllerInternalIDE.h | 2 +- 7 files changed, 28 insertions(+), 25 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index a5ca13887..937e3540e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -35,10 +35,12 @@ class SparseLLVMBasedICFG bool IncludeGlobals = true); /// Creates an ICFG with an already given call-graph - explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB); + explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT); explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, - const nlohmann::json &SerializedCG); + const nlohmann::json &SerializedCG, + LLVMAliasInfoRef PT); ~SparseLLVMBasedICFG(); @@ -48,7 +50,7 @@ class SparseLLVMBasedICFG std::unique_ptr SparseCFGCache; LLVMProjectIRDB *IRDB{}; - std::shared_ptr AliasAnalysis; + LLVMAliasInfoRef AliasAnalysis; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index d1fadd751..f439d7058 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -14,7 +14,7 @@ #include "phasar/ControlFlow/ICFGBase.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" #include @@ -40,7 +40,8 @@ class SparseLLVMBasedICFGView friend SparseLLVMBasedCFGProvider; public: - explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF); + explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, + LLVMAliasInfoRef PT); ~SparseLLVMBasedICFGView(); @@ -67,7 +68,7 @@ class SparseLLVMBasedICFGView const LLVMProjectIRDB *IRDB{}; const LLVMBasedICFG *ICF{}; std::unique_ptr SparseCFGCache; - std::shared_ptr AliasAnalysis; + LLVMAliasInfoRef AliasAnalysis; }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp index c9dcc1c3b..e0ecbe767 100644 --- a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp @@ -2,6 +2,7 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Pointer/AliasAnalysisType.h" #include "llvm/IR/IntrinsicInst.h" @@ -71,7 +72,7 @@ static bool isNonAddressTakenVariable(const llvm::Value *Val) { } static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, - const std::shared_ptr &AliasAnalysis) { + LLVMAliasInfoRef AliasAnalysis) { // Pointers to pointers may alias with any pointer, because the analysis may // not be field-sensitive. // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume @@ -81,7 +82,7 @@ static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, return false; } - return AliasAnalysis->alias(Ptr1, Ptr2) == AliasResult::MayAlias; + return AliasAnalysis.alias(Ptr1, Ptr2) != AliasResult::NoAlias; } static bool isFirstInBB(const llvm::Instruction *Inst) { @@ -109,7 +110,7 @@ static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { static bool shouldKeepInst(const llvm::Instruction *Inst, const llvm::Value *Val, - const std::shared_ptr &AliasAnalysis) { + LLVMAliasInfoRef AliasAnalysis) { if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { // First in BB always stays for now @@ -120,7 +121,6 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, const auto *ValTy = Val->getType(); bool ValPtr = ValTy->isPointerTy(); - const auto *PointeeTy = ValPtr ? getPointeeTypeOrNull(Val) : nullptr; if (const auto *Call = llvm::dyn_cast(Inst)) { if (llvm::isa(Val)) { @@ -163,7 +163,7 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG::vgraph_t &SCFG, const llvm::Function *Fun, const llvm::Value *Val, - const std::shared_ptr &AliasAnalysis) { + LLVMAliasInfoRef AliasAnalysis) { // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " // << llvmIRToString(Val) << '\n'; @@ -216,8 +216,7 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, const SparseLLVMBasedCFG & SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, - const llvm::Value *Val, - const std::shared_ptr &AliasAnalysis) { + const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis) { // TODO: Make thread-safe auto [It, Inserted] = Cache.try_emplace(std::make_pair(Fun, Val)); diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.h b/lib/PhasarLLVM/ControlFlow/SVFGCache.h index e981e344f..c4adc6f96 100644 --- a/lib/PhasarLLVM/ControlFlow/SVFGCache.h +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.h @@ -11,7 +11,7 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/Compiler.h" @@ -33,8 +33,7 @@ struct SVFGCache { LLVM_LIBRARY_VISIBILITY const SparseLLVMBasedCFG & getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, - const llvm::Value *Val, - const std::shared_ptr &AliasAnalysis); + const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis); }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index b8a73abf4..c7fed7de0 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -23,18 +23,19 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG( llvm::ArrayRef EntryPoints, LLVMTypeHierarchy *TH, LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), - SparseCFGCache(new SVFGCache{}), IRDB(IRDB), - AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} + SparseCFGCache(new SVFGCache{}), IRDB(IRDB), AliasAnalysis(PT) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, - LLVMProjectIRDB *IRDB) + LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT) : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}), - IRDB(IRDB), AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} + IRDB(IRDB), AliasAnalysis(PT) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, - const nlohmann::json &SerializedCG) + const nlohmann::json &SerializedCG, + LLVMAliasInfoRef PT) : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}), - IRDB(IRDB), AliasAnalysis(new LLVMAliasSet(IRDB, false)) {} + IRDB(IRDB), AliasAnalysis(PT) {} const SparseLLVMBasedCFG & SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp index 854dd230d..fbfa0e42a 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp @@ -8,9 +8,10 @@ using namespace psr; -SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF) +SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, + LLVMAliasInfoRef PT) : IRDB(ICF->getIRDB()), ICF(ICF), SparseCFGCache(new SVFGCache{}), - AliasAnalysis(new LLVMAliasSet(ICF->getIRDB(), false)) { + AliasAnalysis(PT) { // } diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h b/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h index f668b786e..789157a75 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h @@ -56,7 +56,7 @@ template static void executeSparseIfdsIdeAnalysis(AnalysisController &Data, ArgTys &&...Args) { - SparseLLVMBasedICFGView SVFG(&Data.HA->getICFG()); + SparseLLVMBasedICFGView SVFG(&Data.HA->getICFG(), &Data.HA->getAliasInfo()); executeIfdsIdeAnalysisImpl( Data, SVFG, std::forward(Args)...); } From dcb239d78a974c33ac9298291039fb3cdcca25c0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 28 Nov 2024 19:51:30 +0100 Subject: [PATCH 16/17] some cleanup --- .clang-tidy | 1 + .../ControlFlow/SparseLLVMBasedICFG.h | 1 - .../ControlFlow/SparseLLVMBasedICFGView.h | 1 - lib/PhasarLLVM/ControlFlow/SVFGCache.cpp | 54 ++----------------- .../ControlFlow/SparseLLVMBasedICFG.cpp | 6 +-- .../ControlFlow/SparseLLVMBasedICFGView.cpp | 10 ++-- 6 files changed, 12 insertions(+), 61 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5f0d1e9ef..4b361e855 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,6 +18,7 @@ Checks: '-*, -readability-isolate-declaration, -readability-identifier-length, -readability-redundant-member-init, + -readability-use-anyofallof, cppcoreguidelines-*, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 937e3540e..6ba3e8963 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -49,7 +49,6 @@ class SparseLLVMBasedICFG getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; std::unique_ptr SparseCFGCache; - LLVMProjectIRDB *IRDB{}; LLVMAliasInfoRef AliasAnalysis; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index f439d7058..37c5dc1f2 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -65,7 +65,6 @@ class SparseLLVMBasedICFGView [[nodiscard]] const SparseLLVMBasedCFG & getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; - const LLVMProjectIRDB *IRDB{}; const LLVMBasedICFG *ICF{}; std::unique_ptr SparseCFGCache; LLVMAliasInfoRef AliasAnalysis; diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp index e0ecbe767..c7a49c7ac 100644 --- a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp @@ -3,36 +3,16 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/Pointer/AliasAnalysisType.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" using namespace psr; -static const llvm::Type *getPointeeTypeOrNull(const llvm::Value *V) { - // TODO - if (const auto *Alloca = llvm::dyn_cast(V)) { - return Alloca->getAllocatedType(); - } - if (const auto *Arg = llvm::dyn_cast(V)) { - if (const auto *ByValTy = Arg->getParamByValType()) { - return ByValTy; - } - if (const auto *ByValTy = Arg->getParamStructRetType()) { - return ByValTy; - } - } - - // TODO: Handle more cases - - return nullptr; -} - static bool isNonPointerType(const llvm::Type *Ty) { if (const auto *Struct = llvm::dyn_cast(Ty)) { for (const auto *ElemTy : Struct->elements()) { - // TODO: Go into nested structs recursively + // XXX: Go into nested structs recursively if (!ElemTy->isSingleValueType() || ElemTy->isVectorTy()) { return false; } @@ -71,13 +51,8 @@ static bool isNonAddressTakenVariable(const llvm::Value *Val) { return true; } -static bool fuzzyMayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, - LLVMAliasInfoRef AliasAnalysis) { - // Pointers to pointers may alias with any pointer, because the analysis may - // not be field-sensitive. - // If we don't know the pointee-type (PointeeTyN == nullptr), we cannot assume - // anything. - +static bool mayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, + LLVMAliasInfoRef AliasAnalysis) { if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { return false; } @@ -113,9 +88,6 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, LLVMAliasInfoRef AliasAnalysis) { if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { // First in BB always stays for now - - // llvm::errs() << "[shouldKeepInst]: 1: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } @@ -124,16 +96,12 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, if (const auto *Call = llvm::dyn_cast(Inst)) { if (llvm::isa(Val)) { - // llvm::errs() << "[shouldKeepInst]: 2: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } } for (const auto *Op : Inst->operand_values()) { if (Op == Val) { - // llvm::errs() << "[shouldKeepInst]: 3.1: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; return true; } if (!ValPtr) { @@ -147,16 +115,11 @@ static bool shouldKeepInst(const llvm::Instruction *Inst, continue; } - if (fuzzyMayAlias(Val, Op, AliasAnalysis)) { - // llvm::errs() << "[shouldKeepInst]: 3: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; + if (mayAlias(Val, Op, AliasAnalysis)) { return true; } } - // llvm::errs() << "[shouldKeepInst]: FALSE: " << llvmIRToString(Inst) - // << " :: " << llvmIRToShortString(Val) << '\n'; - // TODO return false; } @@ -164,9 +127,6 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, SparseLLVMBasedCFG::vgraph_t &SCFG, const llvm::Function *Fun, const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis) { - - // llvm::errs() << "Build SCFG for '" << Fun->getName() << "' and value " - // << llvmIRToString(Val) << '\n'; llvm::SmallVector< std::pair> WL; @@ -195,10 +155,6 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, auto [It, Inserted] = SCFG.try_emplace(From, To); if (!Inserted) { if (It->second != To) { - // llvm::errs() << "[buildSparseCFG]: Ambiguity at " - // << llvmIRToString(From) << " ::> " - // << llvmIRToShortString(It->second) << " VS " - // << llvmIRToShortString(To) << '\n'; It->second = nullptr; } } @@ -217,7 +173,7 @@ static void buildSparseCFG(const LLVMBasedCFG &CFG, const SparseLLVMBasedCFG & SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis) { - // TODO: Make thread-safe + // XXX: Make thread-safe auto [It, Inserted] = Cache.try_emplace(std::make_pair(Fun, Val)); if (Inserted) { diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp index c7fed7de0..660f8953f 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -23,19 +23,19 @@ SparseLLVMBasedICFG::SparseLLVMBasedICFG( llvm::ArrayRef EntryPoints, LLVMTypeHierarchy *TH, LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), - SparseCFGCache(new SVFGCache{}), IRDB(IRDB), AliasAnalysis(PT) {} + SparseCFGCache(new SVFGCache{}), AliasAnalysis(PT) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT) : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}), - IRDB(IRDB), AliasAnalysis(PT) {} + AliasAnalysis(PT) {} SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedCG, LLVMAliasInfoRef PT) : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}), - IRDB(IRDB), AliasAnalysis(PT) {} + AliasAnalysis(PT) {} const SparseLLVMBasedCFG & SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp index fbfa0e42a..a61b147cc 100644 --- a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp @@ -1,7 +1,6 @@ #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "SVFGCache.h" @@ -10,20 +9,17 @@ using namespace psr; SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, LLVMAliasInfoRef PT) - : IRDB(ICF->getIRDB()), ICF(ICF), SparseCFGCache(new SVFGCache{}), - AliasAnalysis(PT) { - // -} + : ICF(ICF), SparseCFGCache(new SVFGCache{}), AliasAnalysis(PT) {} SparseLLVMBasedICFGView::~SparseLLVMBasedICFGView() = default; FunctionRange SparseLLVMBasedICFGView::getAllFunctionsImpl() const { - return IRDB->getAllFunctions(); + return ICF->getAllFunctions(); } auto SparseLLVMBasedICFGView::getFunctionImpl(llvm::StringRef Fun) const -> f_t { - return IRDB->getFunction(Fun); + return ICF->getFunction(Fun); }; bool SparseLLVMBasedICFGView::isIndirectFunctionCallImpl(n_t Inst) const { From c9d9ba1362386f3f5197649b1c03960866ef2991 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 15 Dec 2024 13:12:00 +0100 Subject: [PATCH 17/17] minor --- .../PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp index 902b5270e..e9dc31c7d 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp @@ -14,14 +14,10 @@ #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Soundness.h" -#include "phasar/Utils/TypeTraits.h" #include "TestConfig.h" #include "gtest/gtest.h" -#include -#include - using namespace psr; namespace { /* ============== TEST FIXTURE ============== */ @@ -67,8 +63,8 @@ TEST_P(LinearConstant, SparseResultsEquivalent) { << "At " << llvmIRToString(Cell.getRowKey()) << " :: " << llvmIRToShortString(Cell.getColumnKey()); } - - // TODO: Check for existing results + // Note: Do not check for equivalence, because SparseIDE is *expected* to + // compute less (N, D) results than vanilla IDE. } static LLVMTaintConfig getDoubleFreeConfig() {