From 78ee1e1835e73734bf6aed9b1eec3add0da51b73 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Fri, 10 May 2024 10:17:43 -0400 Subject: [PATCH 01/10] Reorder members to prepare for adding CGSCC --- src/BoundedTerminationPass.cpp | 160 ++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 73 deletions(-) diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index bee0ebb..4b9dbf7 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -22,7 +22,7 @@ #include //------------------------------------------------------------------------------ -// New PM interface +// Type definitions //------------------------------------------------------------------------------ // Key result for the bounded termination pass: @@ -62,6 +62,57 @@ enum class DoesThisTerminate { Unknown, }; +// Complete result for a termination evaluation: +// an enum result, plus an explanation of reasoning. +struct TerminationPassResult { + DoesThisTerminate elt = DoesThisTerminate::Unevaluated; + std::string explanation = "unevaluated"; +}; + +// Pass over functions: does this terminate: +struct FunctionTerminationPass + : public llvm::AnalysisInfoMixin { + using Result = TerminationPassResult; + Result run(llvm::Function &F, llvm::FunctionAnalysisManager &); + // Part of the official API: + // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes + static bool isRequired() { return true; } + +private: + // A special type used by analysis passes to provide an address that + // identifies that particular analysis pass type. + static llvm::AnalysisKey Key; + friend llvm::AnalysisInfoMixin; +}; + +// Printer pass for the function termination checker +class BoundedTerminationPrinter + : public llvm::PassInfoMixin { +public: + explicit BoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &FAM); + // Part of the official API: + // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes + static bool isRequired() { return true; } + +private: + llvm::raw_ostream &OS; +}; + +//------------------------------------------------------------------------------ +// Free functions +//------------------------------------------------------------------------------ + +bool operator<(const TerminationPassResult &a, + const TerminationPassResult &b) { + if (a.elt == b.elt) { + return a.explanation < b.explanation; + } else { + return a.elt < b.elt; + } +} + llvm::StringRef to_string(DoesThisTerminate t) { switch (t) { case DoesThisTerminate::Unevaluated: @@ -98,53 +149,7 @@ std::string friendly_name_block(llvm::StringRef unfriendly) { return result; } -struct BoundedTerminationPassResult { - DoesThisTerminate elt = DoesThisTerminate::Unevaluated; - std::string explanation = "unevaluated"; -}; - -bool operator<(const BoundedTerminationPassResult &a, - const BoundedTerminationPassResult &b) { - if (a.elt == b.elt) { - return a.explanation < b.explanation; - } else { - return a.elt < b.elt; - } -} - -struct BoundedTerminationPass - : public llvm::AnalysisInfoMixin { - using Result = BoundedTerminationPassResult; - Result run(llvm::Function &F, llvm::FunctionAnalysisManager &); - // Part of the official API: - // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes - static bool isRequired() { return true; } - -private: - // A special type used by analysis passes to provide an address that - // identifies that particular analysis pass type. - static llvm::AnalysisKey Key; - friend llvm::AnalysisInfoMixin; -}; - -//------------------------------------------------------------------------------ -// New PM interface for the printer pass -//------------------------------------------------------------------------------ -class BoundedTerminationPrinter - : public llvm::PassInfoMixin { -public: - explicit BoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} - llvm::PreservedAnalyses run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM); - // Part of the official API: - // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes - static bool isRequired() { return true; } - -private: - llvm::raw_ostream &OS; -}; - -BoundedTerminationPassResult +TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) { for (const auto &I : block) { // Classify instructions based on whether we need to look at their metadata @@ -160,21 +165,21 @@ basicBlockClassifier(const llvm::BasicBlock &block) { } else { callee_name = "Indirect"; } - return BoundedTerminationPassResult{ + return TerminationPassResult{ .elt = DoesThisTerminate::Unknown, .explanation = "Calls function with unknown properties: " + callee_name}; } } - return BoundedTerminationPassResult{.elt = DoesThisTerminate::Bounded, + return TerminationPassResult{.elt = DoesThisTerminate::Bounded, .explanation = "no calls"}; } -BoundedTerminationPassResult join(BoundedTerminationPassResult res1, - BoundedTerminationPassResult res2) { - BoundedTerminationPassResult minResult = std::min(res1, res2); - BoundedTerminationPassResult maxResult = std::max(res1, res2); +TerminationPassResult join(TerminationPassResult res1, + TerminationPassResult res2) { + TerminationPassResult minResult = std::min(res1, res2); + TerminationPassResult maxResult = std::max(res1, res2); if (minResult.elt == DoesThisTerminate::Unevaluated) { return maxResult; @@ -182,7 +187,7 @@ BoundedTerminationPassResult join(BoundedTerminationPassResult res1, if (minResult.elt == DoesThisTerminate::Bounded) { if (maxResult.elt == DoesThisTerminate::Unbounded) { - return BoundedTerminationPassResult{ + return TerminationPassResult{ .elt = DoesThisTerminate::Unknown, .explanation = "Joined with Unbounded branch: " + maxResult.explanation, @@ -194,7 +199,7 @@ BoundedTerminationPassResult join(BoundedTerminationPassResult res1, if (minResult.elt == DoesThisTerminate::Unbounded) { if (maxResult.elt == DoesThisTerminate::Unbounded) { - return BoundedTerminationPassResult{ + return TerminationPassResult{ .elt = DoesThisTerminate::Unbounded, .explanation = "Joined two Unbounded branches: (" + minResult.explanation + "), (" + @@ -206,11 +211,11 @@ BoundedTerminationPassResult join(BoundedTerminationPassResult res1, return maxResult; } -BoundedTerminationPassResult -update(BoundedTerminationPassResult result, - std::vector pred_results) { +TerminationPassResult +update(TerminationPassResult result, + std::vector pred_results) { - BoundedTerminationPassResult predecessor_result; + TerminationPassResult predecessor_result; for (const auto &predecessor : pred_results) { predecessor_result = join(predecessor_result, predecessor); } @@ -231,15 +236,15 @@ update(BoundedTerminationPassResult result, } } -BoundedTerminationPassResult loopClassifier(const llvm::Loop &loop, +TerminationPassResult loopClassifier(const llvm::Loop &loop, llvm::ScalarEvolution &SE) { std::optional bounds = loop.getBounds(SE); if (!bounds.has_value()) { - return BoundedTerminationPassResult{ + return TerminationPassResult{ .elt = DoesThisTerminate::Unknown, .explanation = "includes loop with indeterminate bounds"}; } - return BoundedTerminationPassResult{ + return TerminationPassResult{ .elt = DoesThisTerminate::Bounded, .explanation = "includes a loop, but it has a fixed bound"}; } @@ -255,18 +260,20 @@ bool isExitingBlock(const llvm::BasicBlock &B) { return terminator->willReturn(); } -llvm::AnalysisKey BoundedTerminationPass::Key; +//------------------------------------------------------------------------------ +// Pass bodies +//------------------------------------------------------------------------------ -BoundedTerminationPass::Result -BoundedTerminationPass::run(llvm::Function &F, +FunctionTerminationPass::Result +FunctionTerminationPass::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { - std::map blocks_to_results; + std::map blocks_to_results; // SetVector preserves insertion order - which is nice because it makes this deterministic. llvm::SetVector outstanding_nodes; // Step 1 : do local basic block analysis for (auto &basic_block : F) { - BoundedTerminationPassResult result = basicBlockClassifier(basic_block); + TerminationPassResult result = basicBlockClassifier(basic_block); blocks_to_results.insert_or_assign(&basic_block, result); outstanding_nodes.insert(&basic_block); } @@ -289,7 +296,7 @@ BoundedTerminationPass::run(llvm::Function &F, while (!outstanding_nodes.empty()) { llvm::BasicBlock *block = outstanding_nodes.pop_back_val(); auto original = blocks_to_results.at(block); - std::vector results; + std::vector results; for(llvm::BasicBlock *predecessor : llvm::predecessors(block)) { results.emplace_back(blocks_to_results.at(predecessor)); } @@ -303,7 +310,7 @@ BoundedTerminationPass::run(llvm::Function &F, } // Step 4 : join results of exiting blocks - BoundedTerminationPassResult aggregate_result{ + TerminationPassResult aggregate_result{ .elt = DoesThisTerminate::Unevaluated, .explanation = ""}; for (auto const &[key, value] : blocks_to_results) { @@ -318,8 +325,8 @@ BoundedTerminationPass::run(llvm::Function &F, llvm::PreservedAnalyses BoundedTerminationPrinter::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { - BoundedTerminationPass::Result &result = - FAM.getResult(F); + FunctionTerminationPass::Result &result = + FAM.getResult(F); OS << "Function name: " << llvm::demangle(F.getName()) << "\n"; OS << "Result: " << result.elt << "\n"; @@ -328,6 +335,12 @@ BoundedTerminationPrinter::run(llvm::Function &F, return llvm::PreservedAnalyses::all(); } +//------------------------------------------------------------------------------ +// Static / wiring +//------------------------------------------------------------------------------ + +llvm::AnalysisKey FunctionTerminationPass::Key; + llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { using namespace ::llvm; return {LLVM_PLUGIN_API_VERSION, "bounded-termination", LLVM_VERSION_STRING, @@ -345,11 +358,12 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { // #2 REGISTRATION FOR "MAM.getResult(Module)" PB.registerAnalysisRegistrationCallback( [](FunctionAnalysisManager &FAM) { - FAM.registerPass([&] { return BoundedTerminationPass(); }); + FAM.registerPass([&] { return FunctionTerminationPass(); }); }); }}; }; + extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getBoundedTerminationPassPluginInfo(); From cc76008e7af385798aeae8c66504faf92d009729 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Mon, 13 May 2024 14:54:44 -0400 Subject: [PATCH 02/10] Add backstop: timeout on bounds checker --- loops/default.loops.do | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loops/default.loops.do b/loops/default.loops.do index d6149f0..bd74568 100644 --- a/loops/default.loops.do +++ b/loops/default.loops.do @@ -24,9 +24,11 @@ FLAGS="$(cat ../compile_flags.txt)" TEMP="$(mktemp)" +timeout 30s \ "$LLVM_DIR"/bin/opt -load-pass-plugin \ "$PASS_TARGET" \ -passes="print" \ -disable-output \ "$ANALYSIS_FILE" \ 2>"$3" + From 93a0082793b7da1e72c860a4975b77b5833d9f50 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Mon, 13 May 2024 15:49:53 -0400 Subject: [PATCH 03/10] Add CGSCC pass, use it first / in preference --- loops/all.do | 5 -- src/BoundedTerminationPass.cpp | 117 ++++++++++++++++++++++----------- 2 files changed, 80 insertions(+), 42 deletions(-) diff --git a/loops/all.do b/loops/all.do index e63f711..01d6be7 100644 --- a/loops/all.do +++ b/loops/all.do @@ -3,11 +3,6 @@ redo-ifchange $( # Alt: stripped="${string%"$suffix"}" for F in $(find ../testdata -type f -name '*.cpp' -or -name '*.c') do - # TODO: Fix these: - if echo "$F" | grep -P '(factorial|collatz)' - then - continue - fi echo $(basename "$F" | sed 's/c\(pp\)\?$/loops/') done ) diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 4b9dbf7..1ef623f 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -1,5 +1,7 @@ #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -85,13 +87,32 @@ struct FunctionTerminationPass friend llvm::AnalysisInfoMixin; }; +struct CallGraphTerminationPass + : public llvm::AnalysisInfoMixin { + using Result = TerminationPassResult; + Result run(llvm::LazyCallGraph::SCC &C, llvm::CGSCCAnalysisManager &AM, + llvm::LazyCallGraph &CG); + + // Part of the official API: + // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes + static bool isRequired() { return true; } + +private: + // A special type used by analysis passes to provide an address that + // identifies that particular analysis pass type. + static llvm::AnalysisKey Key; + friend llvm::AnalysisInfoMixin; +}; + // Printer pass for the function termination checker class BoundedTerminationPrinter : public llvm::PassInfoMixin { public: explicit BoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} - llvm::PreservedAnalyses run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM); + llvm::PreservedAnalyses run(llvm::LazyCallGraph::SCC &SCC, + llvm::CGSCCAnalysisManager &AM, + llvm::LazyCallGraph &CG, + llvm::CGSCCUpdateResult &); // Part of the official API: // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes static bool isRequired() { return true; } @@ -104,8 +125,7 @@ class BoundedTerminationPrinter // Free functions //------------------------------------------------------------------------------ -bool operator<(const TerminationPassResult &a, - const TerminationPassResult &b) { +bool operator<(const TerminationPassResult &a, const TerminationPassResult &b) { if (a.elt == b.elt) { return a.explanation < b.explanation; } else { @@ -149,8 +169,7 @@ std::string friendly_name_block(llvm::StringRef unfriendly) { return result; } -TerminationPassResult -basicBlockClassifier(const llvm::BasicBlock &block) { +TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) { for (const auto &I : block) { // Classify instructions based on whether we need to look at their metadata // In particular: call, invoke, callbr (these might have unbounded behavior) @@ -173,11 +192,11 @@ basicBlockClassifier(const llvm::BasicBlock &block) { } return TerminationPassResult{.elt = DoesThisTerminate::Bounded, - .explanation = "no calls"}; + .explanation = "no calls"}; } TerminationPassResult join(TerminationPassResult res1, - TerminationPassResult res2) { + TerminationPassResult res2) { TerminationPassResult minResult = std::min(res1, res2); TerminationPassResult maxResult = std::max(res1, res2); @@ -211,9 +230,8 @@ TerminationPassResult join(TerminationPassResult res1, return maxResult; } -TerminationPassResult -update(TerminationPassResult result, - std::vector pred_results) { +TerminationPassResult update(TerminationPassResult result, + std::vector pred_results) { TerminationPassResult predecessor_result; for (const auto &predecessor : pred_results) { @@ -237,16 +255,16 @@ update(TerminationPassResult result, } TerminationPassResult loopClassifier(const llvm::Loop &loop, - llvm::ScalarEvolution &SE) { + llvm::ScalarEvolution &SE) { std::optional bounds = loop.getBounds(SE); if (!bounds.has_value()) { - return TerminationPassResult{ - .elt = DoesThisTerminate::Unknown, - .explanation = "includes loop with indeterminate bounds"}; + return TerminationPassResult{.elt = DoesThisTerminate::Unknown, + .explanation = + "includes loop with indeterminate bounds"}; } - return TerminationPassResult{ - .elt = DoesThisTerminate::Bounded, - .explanation = "includes a loop, but it has a fixed bound"}; + return TerminationPassResult{.elt = DoesThisTerminate::Bounded, + .explanation = + "includes a loop, but it has a fixed bound"}; } bool isExitingBlock(const llvm::BasicBlock &B) { @@ -263,13 +281,34 @@ bool isExitingBlock(const llvm::BasicBlock &B) { //------------------------------------------------------------------------------ // Pass bodies //------------------------------------------------------------------------------ +TerminationPassResult +CallGraphTerminationPass::run(llvm::LazyCallGraph::SCC &C, + llvm::CGSCCAnalysisManager &AM, + llvm::LazyCallGraph &CG) { + llvm::LazyCallGraph::Node &N = *C.begin(); + if (C.size() > 1 || (N->lookup(N) != nullptr)) { + // Recursive SCC: + // either >1 node, + // or 1 node with a self-edge. + return TerminationPassResult{ + .elt = DoesThisTerminate::Unknown, + .explanation = "part of a set of mutually recursive functions", + }; + } + // Process the function. + + auto &FAM = + AM.getResult(C, CG).getManager(); + return FAM.getResult(N.getFunction()); +} FunctionTerminationPass::Result FunctionTerminationPass::run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM) { + llvm::FunctionAnalysisManager &FAM) { std::map blocks_to_results; - // SetVector preserves insertion order - which is nice because it makes this deterministic. - llvm::SetVector outstanding_nodes; + // SetVector preserves insertion order - which is nice because it makes this + // deterministic. + llvm::SetVector outstanding_nodes; // Step 1 : do local basic block analysis for (auto &basic_block : F) { @@ -297,21 +336,21 @@ FunctionTerminationPass::run(llvm::Function &F, llvm::BasicBlock *block = outstanding_nodes.pop_back_val(); auto original = blocks_to_results.at(block); std::vector results; - for(llvm::BasicBlock *predecessor : llvm::predecessors(block)) { + for (llvm::BasicBlock *predecessor : llvm::predecessors(block)) { results.emplace_back(blocks_to_results.at(predecessor)); } auto altered = update(original, std::move(results)); blocks_to_results.insert_or_assign(block, altered); - if(altered.elt != original.elt) { - for(auto *successor : llvm::successors(block)) { + if (altered.elt != original.elt) { + for (auto *successor : llvm::successors(block)) { outstanding_nodes.insert(successor); } } } // Step 4 : join results of exiting blocks - TerminationPassResult aggregate_result{ - .elt = DoesThisTerminate::Unevaluated, .explanation = ""}; + TerminationPassResult aggregate_result{.elt = DoesThisTerminate::Unevaluated, + .explanation = ""}; for (auto const &[key, value] : blocks_to_results) { if (isExitingBlock(*key)) { @@ -322,13 +361,15 @@ FunctionTerminationPass::run(llvm::Function &F, return aggregate_result; } -llvm::PreservedAnalyses -BoundedTerminationPrinter::run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM) { - FunctionTerminationPass::Result &result = - FAM.getResult(F); +llvm::PreservedAnalyses BoundedTerminationPrinter::run( + llvm::LazyCallGraph::SCC &SCC, llvm::CGSCCAnalysisManager &AM, + llvm::LazyCallGraph &CG, llvm::CGSCCUpdateResult &) { + TerminationPassResult &result = + AM.getResult(SCC, CG); - OS << "Function name: " << llvm::demangle(F.getName()) << "\n"; + // SCC names appear to be "names of the functions, comma-separated, in parens" + // which demangle doesn't know how to handle. + OS << "CGSCC name: " << llvm::demangle(SCC.getName()) << "\n"; OS << "Result: " << result.elt << "\n"; OS << "Explanation: " << result.explanation << "\n\n"; @@ -340,22 +381,25 @@ BoundedTerminationPrinter::run(llvm::Function &F, //------------------------------------------------------------------------------ llvm::AnalysisKey FunctionTerminationPass::Key; +llvm::AnalysisKey CallGraphTerminationPass::Key; llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { using namespace ::llvm; return {LLVM_PLUGIN_API_VERSION, "bounded-termination", LLVM_VERSION_STRING, [](PassBuilder &PB) { - // #1 REGISTRATION FOR "opt -passes=print" PB.registerPipelineParsingCallback( - [&](StringRef Name, FunctionPassManager &FPM, + [&](StringRef Name, CGSCCPassManager &PM, ArrayRef) { if (Name == "print") { - FPM.addPass(BoundedTerminationPrinter(llvm::errs())); + PM.addPass(BoundedTerminationPrinter(llvm::errs())); return true; } return false; }); - // #2 REGISTRATION FOR "MAM.getResult(Module)" + PB.registerAnalysisRegistrationCallback( + [](CGSCCAnalysisManager &CAM) { + CAM.registerPass([&] { return CallGraphTerminationPass(); }); + }); PB.registerAnalysisRegistrationCallback( [](FunctionAnalysisManager &FAM) { FAM.registerPass([&] { return FunctionTerminationPass(); }); @@ -363,7 +407,6 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { }}; }; - extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getBoundedTerminationPassPluginInfo(); From ad2586eb9402720fbcfeee28bf687598b27052d2 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Wed, 15 May 2024 10:14:26 -0400 Subject: [PATCH 04/10] WIP on getting CGSCC results in function pass --- loops/default.loops.do | 2 +- notes/notes.md | 9 +++ src/BoundedTerminationPass.cpp | 119 +++++++++++++++++++-------------- 3 files changed, 78 insertions(+), 52 deletions(-) diff --git a/loops/default.loops.do b/loops/default.loops.do index bd74568..9afe65c 100644 --- a/loops/default.loops.do +++ b/loops/default.loops.do @@ -24,7 +24,7 @@ FLAGS="$(cat ../compile_flags.txt)" TEMP="$(mktemp)" -timeout 30s \ +timeout 10s \ "$LLVM_DIR"/bin/opt -load-pass-plugin \ "$PASS_TARGET" \ -passes="print" \ diff --git a/notes/notes.md b/notes/notes.md index 0f09902..c0bc160 100644 --- a/notes/notes.md +++ b/notes/notes.md @@ -307,6 +307,15 @@ Write a CallGraphSCC pass: The CallGraphSCC pass doesn't have to label anything, but when we analyze each function, we can get the cached result from its parent CGSCC. + +## Dependencies + +Results should implement [invalidate] to represent dependencies with other analyses. + +e.g.: our function pass depends on the loop pass + +[invalidate]: https://llvm.org/doxygen/classllvm_1_1AnalysisManager_1_1Invalidator.html + ## Future: symbolic execution At some point, break out KLEE... diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 1ef623f..3cb3f39 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -66,6 +66,8 @@ enum class DoesThisTerminate { // Complete result for a termination evaluation: // an enum result, plus an explanation of reasoning. +// +// TODO: This may be invalidated by loop transforms - implement `invalidate` struct TerminationPassResult { DoesThisTerminate elt = DoesThisTerminate::Unevaluated; std::string explanation = "unevaluated"; @@ -87,32 +89,13 @@ struct FunctionTerminationPass friend llvm::AnalysisInfoMixin; }; -struct CallGraphTerminationPass - : public llvm::AnalysisInfoMixin { - using Result = TerminationPassResult; - Result run(llvm::LazyCallGraph::SCC &C, llvm::CGSCCAnalysisManager &AM, - llvm::LazyCallGraph &CG); - - // Part of the official API: - // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes - static bool isRequired() { return true; } - -private: - // A special type used by analysis passes to provide an address that - // identifies that particular analysis pass type. - static llvm::AnalysisKey Key; - friend llvm::AnalysisInfoMixin; -}; - // Printer pass for the function termination checker class BoundedTerminationPrinter : public llvm::PassInfoMixin { public: explicit BoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} - llvm::PreservedAnalyses run(llvm::LazyCallGraph::SCC &SCC, - llvm::CGSCCAnalysisManager &AM, - llvm::LazyCallGraph &CG, - llvm::CGSCCUpdateResult &); + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &FAM); // Part of the official API: // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes static bool isRequired() { return true; } @@ -184,6 +167,11 @@ TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) { } else { callee_name = "Indirect"; } + // Our target may be: + // - Unknown (indirect), and therefore DoesThisTerminate::Unknown + // - Part of a recursive CGSCC + // - Known, but internally unbounded (e.g. contains an unbounded loop) + // - Bounded return TerminationPassResult{ .elt = DoesThisTerminate::Unknown, .explanation = @@ -278,33 +266,70 @@ bool isExitingBlock(const llvm::BasicBlock &B) { return terminator->willReturn(); } -//------------------------------------------------------------------------------ -// Pass bodies -//------------------------------------------------------------------------------ TerminationPassResult -CallGraphTerminationPass::run(llvm::LazyCallGraph::SCC &C, - llvm::CGSCCAnalysisManager &AM, - llvm::LazyCallGraph &CG) { - llvm::LazyCallGraph::Node &N = *C.begin(); - if (C.size() > 1 || (N->lookup(N) != nullptr)) { +detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { + auto &MAM = FAM.getResult(F); + const llvm::LazyCallGraph *CG = + MAM.getCachedResult(*F.getParent()); + if (CG == nullptr) { + return TerminationPassResult{ + .elt = DoesThisTerminate::Unknown, + .explanation = "no LazyCallGraph for " + llvm::demangle(F.getName()), + }; + } + llvm::LazyCallGraph::Node *cg_node = CG->lookup(F); + if (cg_node == nullptr) { + return TerminationPassResult{ + .elt = DoesThisTerminate::Unknown, + .explanation = + "no LazyCallGraph::Node for " + llvm::demangle(F.getName()), + }; + } + llvm::LazyCallGraph::SCC *C = CG->lookupSCC(*cg_node); + if (C == nullptr) { + return TerminationPassResult{ + .elt = DoesThisTerminate::Unknown, + .explanation = + "no LazyCallGraph::SCC for " + llvm::demangle(F.getName()), + }; + } + + auto N = C->begin(); + if (C->size() > 1 || ((*N)->lookup(*N) != nullptr)) { // Recursive SCC: // either >1 node, // or 1 node with a self-edge. return TerminationPassResult{ .elt = DoesThisTerminate::Unknown, - .explanation = "part of a set of mutually recursive functions", + .explanation = + "function " + llvm::demangle(F.getName()) + + " is one of a set of mutually recursive functions: " + C->getName(), }; } - // Process the function. - - auto &FAM = - AM.getResult(C, CG).getManager(); - return FAM.getResult(N.getFunction()); + // Otherwise, this is...unevaluated? + return TerminationPassResult{ + .elt = DoesThisTerminate::Bounded, + .explanation = "", + }; } +//------------------------------------------------------------------------------ +// Pass bodies +//------------------------------------------------------------------------------ + FunctionTerminationPass::Result FunctionTerminationPass::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { + const auto &outer_result = detect_cgscc_recursion(F, FAM); + + // If this function is part of a recursive call-graph group + // (a non-trivial CGSCC), then we don't do any more analysis. + // This way, we avoid recursing in getResult + // for the callees of this function. + if (outer_result.elt >= DoesThisTerminate::Unbounded) { + return outer_result; + } + std::map blocks_to_results; // SetVector preserves insertion order - which is nice because it makes this // deterministic. @@ -349,8 +374,7 @@ FunctionTerminationPass::run(llvm::Function &F, } // Step 4 : join results of exiting blocks - TerminationPassResult aggregate_result{.elt = DoesThisTerminate::Unevaluated, - .explanation = ""}; + TerminationPassResult aggregate_result = outer_result; for (auto const &[key, value] : blocks_to_results) { if (isExitingBlock(*key)) { @@ -361,15 +385,13 @@ FunctionTerminationPass::run(llvm::Function &F, return aggregate_result; } -llvm::PreservedAnalyses BoundedTerminationPrinter::run( - llvm::LazyCallGraph::SCC &SCC, llvm::CGSCCAnalysisManager &AM, - llvm::LazyCallGraph &CG, llvm::CGSCCUpdateResult &) { - TerminationPassResult &result = - AM.getResult(SCC, CG); +llvm::PreservedAnalyses +BoundedTerminationPrinter::run(llvm::Function &F, + llvm::FunctionAnalysisManager &FAM) { + FunctionTerminationPass::Result &result = + FAM.getResult(F); - // SCC names appear to be "names of the functions, comma-separated, in parens" - // which demangle doesn't know how to handle. - OS << "CGSCC name: " << llvm::demangle(SCC.getName()) << "\n"; + OS << "Function name: " << llvm::demangle(F.getName()) << "\n"; OS << "Result: " << result.elt << "\n"; OS << "Explanation: " << result.explanation << "\n\n"; @@ -381,14 +403,13 @@ llvm::PreservedAnalyses BoundedTerminationPrinter::run( //------------------------------------------------------------------------------ llvm::AnalysisKey FunctionTerminationPass::Key; -llvm::AnalysisKey CallGraphTerminationPass::Key; llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { using namespace ::llvm; return {LLVM_PLUGIN_API_VERSION, "bounded-termination", LLVM_VERSION_STRING, [](PassBuilder &PB) { PB.registerPipelineParsingCallback( - [&](StringRef Name, CGSCCPassManager &PM, + [&](StringRef Name, FunctionPassManager &PM, ArrayRef) { if (Name == "print") { PM.addPass(BoundedTerminationPrinter(llvm::errs())); @@ -396,10 +417,6 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { } return false; }); - PB.registerAnalysisRegistrationCallback( - [](CGSCCAnalysisManager &CAM) { - CAM.registerPass([&] { return CallGraphTerminationPass(); }); - }); PB.registerAnalysisRegistrationCallback( [](FunctionAnalysisManager &FAM) { FAM.registerPass([&] { return FunctionTerminationPass(); }); From 3bfb0fefe88aef3b41c586464668a5362f0376bf Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Wed, 15 May 2024 10:36:06 -0400 Subject: [PATCH 05/10] Start to understand relationship between pass levels - Need to implement invalidation - Need to work out a better way to trigger the upper-level pass from the lower-level one / put it in the pipeline --- loops/default.loops.do | 8 +++++--- src/BoundedTerminationPass.cpp | 13 +++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/loops/default.loops.do b/loops/default.loops.do index 9afe65c..edbf7c6 100644 --- a/loops/default.loops.do +++ b/loops/default.loops.do @@ -1,4 +1,5 @@ - +#!/bin/bash +# set -eux if uname -a | grep -q Linux @@ -24,11 +25,12 @@ FLAGS="$(cat ../compile_flags.txt)" TEMP="$(mktemp)" +set -o pipefail timeout 10s \ "$LLVM_DIR"/bin/opt -load-pass-plugin \ "$PASS_TARGET" \ - -passes="print" \ + -passes="module(do_lazy_cg),function(print)" \ -disable-output \ "$ANALYSIS_FILE" \ - 2>"$3" + 2>&1 | tee "$3" >&2 diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 3cb3f39..2ecc5f5 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -408,6 +408,16 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { using namespace ::llvm; return {LLVM_PLUGIN_API_VERSION, "bounded-termination", LLVM_VERSION_STRING, [](PassBuilder &PB) { + // TODO: This is some hacks + PB.registerPipelineParsingCallback( + [&](StringRef Name, ModulePassManager &PM, + ArrayRef) { + if (Name == "do_lazy_cg") { + PM.addPass(llvm::LazyCallGraphPrinterPass(llvm::errs())); + return true; + } + return false; + }); PB.registerPipelineParsingCallback( [&](StringRef Name, FunctionPassManager &PM, ArrayRef) { @@ -417,6 +427,9 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { } return false; }); + // TODO: Can we register a dependency in the new pass manager, + // that `bounded-termination` requires `LazyCallGraph` before + // entering the function layer? PB.registerAnalysisRegistrationCallback( [](FunctionAnalysisManager &FAM) { FAM.registerPass([&] { return FunctionTerminationPass(); }); From ea5dde36e4ba4bb152a9bde8bcc03871f3d44adc Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Fri, 24 May 2024 16:43:33 -0400 Subject: [PATCH 06/10] WIP: Make the printer pass a module pass For some reason, this is getting bogged down in the _loop analysis_ part of the per-function evaluation...eh? Weird. --- loops/default.loops.do | 2 +- notes/notes.md | 22 +++++ src/BoundedTerminationPass.cpp | 164 +++++++++++++++++++++------------ 3 files changed, 130 insertions(+), 58 deletions(-) diff --git a/loops/default.loops.do b/loops/default.loops.do index edbf7c6..2fb5bb1 100644 --- a/loops/default.loops.do +++ b/loops/default.loops.do @@ -29,7 +29,7 @@ set -o pipefail timeout 10s \ "$LLVM_DIR"/bin/opt -load-pass-plugin \ "$PASS_TARGET" \ - -passes="module(do_lazy_cg),function(print)" \ + -passes="module(print)" \ -disable-output \ "$ANALYSIS_FILE" \ 2>&1 | tee "$3" >&2 diff --git a/notes/notes.md b/notes/notes.md index c0bc160..65bf358 100644 --- a/notes/notes.md +++ b/notes/notes.md @@ -307,6 +307,25 @@ Write a CallGraphSCC pass: The CallGraphSCC pass doesn't have to label anything, but when we analyze each function, we can get the cached result from its parent CGSCC. +...no, we can't. We can only get a result from an outer analysis if the pass is +"cached and immutable", and the LazyCallGraph is not. + +We could do a module pass, or a CGSCC pass; if we are careful to invalidate the +outer pass when the inner object is transformed. Maybe that's OK? + +Or: +- Function-level analysis produces a "contingent" result; either: + - I have this result, regardless of anything + - I am Terminates _if_ all of these functions are Terminates + + So the Function analysis is strictly local; can be handled by an inner proxy. + And it doesn't depend on the CGSCC analysis. + +- The outer result (e.g. Module) gets the Function-local results, + and the CG results, + and "completes" the analysis for each function. + Invalidation naturally falls out by referring to the inner analyses + (CG analysis and Function analysis). ## Dependencies @@ -316,6 +335,9 @@ e.g.: our function pass depends on the loop pass [invalidate]: https://llvm.org/doxygen/classllvm_1_1AnalysisManager_1_1Invalidator.html + + + ## Future: symbolic execution At some point, break out KLEE... diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 2ecc5f5..67f1327 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -67,12 +67,31 @@ enum class DoesThisTerminate { // Complete result for a termination evaluation: // an enum result, plus an explanation of reasoning. // +// Function-level analysis results are contingent: +// they assume that every function this function calls +// is Bounded. +// // TODO: This may be invalidated by loop transforms - implement `invalidate` struct TerminationPassResult { DoesThisTerminate elt = DoesThisTerminate::Unevaluated; std::string explanation = "unevaluated"; }; +// Results from analyzing the full module, +// including call-graph analysis. +struct ModuleTerminationPassResult { + std::map per_function_results; + + // Invalidated when: + // - FunctionTerminationPass is invalidated + // - LazyCallGraphPass analysis is invalidated + bool invalidate(llvm::Module &IR, const llvm::PreservedAnalyses &PA, + llvm::ModuleAnalysisManager::Invalidator &) { + // TODO: This is the worst result we could give here! + return false; + } +}; + // Pass over functions: does this terminate: struct FunctionTerminationPass : public llvm::AnalysisInfoMixin { @@ -89,13 +108,28 @@ struct FunctionTerminationPass friend llvm::AnalysisInfoMixin; }; +struct ModuleTerminationPass + : public llvm::AnalysisInfoMixin { + using Result = ModuleTerminationPassResult; + Result run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM); + // Part of the official API: + // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes + static bool isRequired() { return true; } + +private: + // A special type used by analysis passes to provide an address that + // identifies that particular analysis pass type. + static llvm::AnalysisKey Key; + friend llvm::AnalysisInfoMixin; +}; + // Printer pass for the function termination checker class BoundedTerminationPrinter : public llvm::PassInfoMixin { public: explicit BoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} - llvm::PreservedAnalyses run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM); + llvm::PreservedAnalyses run(llvm::Module &IR, + llvm::ModuleAnalysisManager &MAM); // Part of the official API: // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes static bool isRequired() { return true; } @@ -157,30 +191,21 @@ TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) { // Classify instructions based on whether we need to look at their metadata // In particular: call, invoke, callbr (these might have unbounded behavior) if (const auto *CI = llvm::dyn_cast(&I)) { - std::string callee_name; - - // TODO: Use the function attributes, or function analysis, - // to improve this analysis if (auto called_function = CI->getCalledFunction(); - called_function != nullptr) { - callee_name = llvm::demangle(called_function->getName()); - } else { - callee_name = "Indirect"; + called_function == nullptr) { + // Indirect function call; we don't know what the properties of the + // target are, and our CG analysis won't cover it. + // (CG covers CGSCC/recursion detection, as well as propagating + // unbounded backwards .) + return TerminationPassResult{.elt = DoesThisTerminate::Unknown, + .explanation = + "Performs an indirect function call"}; } - // Our target may be: - // - Unknown (indirect), and therefore DoesThisTerminate::Unknown - // - Part of a recursive CGSCC - // - Known, but internally unbounded (e.g. contains an unbounded loop) - // - Bounded - return TerminationPassResult{ - .elt = DoesThisTerminate::Unknown, - .explanation = - "Calls function with unknown properties: " + callee_name}; } } return TerminationPassResult{.elt = DoesThisTerminate::Bounded, - .explanation = "no calls"}; + .explanation = "no indirect calls"}; } TerminationPassResult join(TerminationPassResult res1, @@ -226,8 +251,8 @@ TerminationPassResult update(TerminationPassResult result, predecessor_result = join(predecessor_result, predecessor); } - // After joining all predecessors, we have one special exception to the 'join' - // rule. + // After joining all predecessors, we have one special exception to the + // 'join' rule. // // 'join' works for symmetric results, but we have an asymmetry here: // if all predecessors are `Unbounded` and this is `Bounded`, then this node @@ -256,8 +281,8 @@ TerminationPassResult loopClassifier(const llvm::Loop &loop, } bool isExitingBlock(const llvm::BasicBlock &B) { - // Check whether the terminating instruction of the block is a "return"-type. - // https://llvm.org/docs/LangRef.html#terminator-instructions + // Check whether the terminating instruction of the block is a + // "return"-type. https://llvm.org/docs/LangRef.html#terminator-instructions const auto *terminator = B.getTerminator(); if (terminator == nullptr) { // TODO: Can we print / capture an error here, or something? @@ -269,6 +294,10 @@ bool isExitingBlock(const llvm::BasicBlock &B) { TerminationPassResult detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { auto &MAM = FAM.getResult(F); + // Process invalidation based on the outer result; + // this makes sure we're invalidated if the LazyCallGraphAnalysis changes. + MAM.registerOuterAnalysisInvalidation(); const llvm::LazyCallGraph *CG = MAM.getCachedResult(*F.getParent()); if (CG == nullptr) { @@ -320,15 +349,15 @@ detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { FunctionTerminationPass::Result FunctionTerminationPass::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { - const auto &outer_result = detect_cgscc_recursion(F, FAM); - - // If this function is part of a recursive call-graph group - // (a non-trivial CGSCC), then we don't do any more analysis. - // This way, we avoid recursing in getResult - // for the callees of this function. - if (outer_result.elt >= DoesThisTerminate::Unbounded) { - return outer_result; - } + // const auto &outer_result = detect_cgscc_recursion(F, FAM); + // + // // If this function is part of a recursive call-graph group + // // (a non-trivial CGSCC), then we don't do any more analysis. + // // This way, we avoid recursing in getResult + // // for the callees of this function. + // if (outer_result.elt >= DoesThisTerminate::Unbounded) { + // return outer_result; + // } std::map blocks_to_results; // SetVector preserves insertion order - which is nice because it makes this @@ -374,7 +403,10 @@ FunctionTerminationPass::run(llvm::Function &F, } // Step 4 : join results of exiting blocks - TerminationPassResult aggregate_result = outer_result; + TerminationPassResult aggregate_result = { + .elt = DoesThisTerminate::Bounded, + .explanation = "" + }; for (auto const &[key, value] : blocks_to_results) { if (isExitingBlock(*key)) { @@ -385,15 +417,40 @@ FunctionTerminationPass::run(llvm::Function &F, return aggregate_result; } -llvm::PreservedAnalyses -BoundedTerminationPrinter::run(llvm::Function &F, - llvm::FunctionAnalysisManager &FAM) { - FunctionTerminationPass::Result &result = - FAM.getResult(F); +ModuleTerminationPass::Result +ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { + std::map per_function_results; + + auto &function_analysis_manager_proxy = + AM.getResult(IR); + auto &FAM = function_analysis_manager_proxy.getManager(); + // Step 1 : function-local analysis + for (llvm::Function &function: IR) { + per_function_results.insert({&function, FAM.getResult(function)}); + } + + // Step 2 : CGSCC analysis; note recursion up-front. + // TODO: + // auto &call_graph = AM.getResult(IR); - OS << "Function name: " << llvm::demangle(F.getName()) << "\n"; - OS << "Result: " << result.elt << "\n"; - OS << "Explanation: " << result.explanation << "\n\n"; + // Step 3 : worklist algorithm on the call graph. + // TODO: + + + return ModuleTerminationPassResult{per_function_results}; +} + +llvm::PreservedAnalyses +BoundedTerminationPrinter::run(llvm::Module &IR, + llvm::ModuleAnalysisManager &AM) { + OS << "Starting pass... \n"; + auto &module_results = AM.getResult(IR); + OS << "got results... \n"; + // for (const auto &[function, result] : module_results.per_function_results) { + // OS << "Function name: " << llvm::demangle(function->getName()) << "\n"; + // OS << "Result: " << result.elt << "\n"; + // OS << "Explanation: " << result.explanation << "\n\n"; + // } return llvm::PreservedAnalyses::all(); } @@ -403,37 +460,30 @@ BoundedTerminationPrinter::run(llvm::Function &F, //------------------------------------------------------------------------------ llvm::AnalysisKey FunctionTerminationPass::Key; +llvm::AnalysisKey ModuleTerminationPass::Key; llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { using namespace ::llvm; return {LLVM_PLUGIN_API_VERSION, "bounded-termination", LLVM_VERSION_STRING, [](PassBuilder &PB) { - // TODO: This is some hacks PB.registerPipelineParsingCallback( [&](StringRef Name, ModulePassManager &PM, ArrayRef) { - if (Name == "do_lazy_cg") { - PM.addPass(llvm::LazyCallGraphPrinterPass(llvm::errs())); - return true; - } - return false; - }); - PB.registerPipelineParsingCallback( - [&](StringRef Name, FunctionPassManager &PM, - ArrayRef) { if (Name == "print") { PM.addPass(BoundedTerminationPrinter(llvm::errs())); return true; } return false; }); - // TODO: Can we register a dependency in the new pass manager, - // that `bounded-termination` requires `LazyCallGraph` before - // entering the function layer? PB.registerAnalysisRegistrationCallback( - [](FunctionAnalysisManager &FAM) { - FAM.registerPass([&] { return FunctionTerminationPass(); }); + [](FunctionAnalysisManager &AM) { + AM.registerPass([&] { return FunctionTerminationPass(); }); + }); + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager&AM) { + AM.registerPass([&] { return ModuleTerminationPass(); }); }); + }}; }; From 06c96c448f09b253a975aa932649202025f8090c Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Mon, 27 May 2024 15:30:32 -0400 Subject: [PATCH 07/10] Bisect issue with function-level termination The same pass (`FunctionBoundedTermination`) completes as expected when invoked from a function-level transform pass (`print`), but not when invoked from a module-level transform pass `print` Is this something weird with the proxy? --- loops/default.loops.do | 2 +- src/BoundedTerminationPass.cpp | 62 ++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/loops/default.loops.do b/loops/default.loops.do index 2fb5bb1..19a3ef7 100644 --- a/loops/default.loops.do +++ b/loops/default.loops.do @@ -29,7 +29,7 @@ set -o pipefail timeout 10s \ "$LLVM_DIR"/bin/opt -load-pass-plugin \ "$PASS_TARGET" \ - -passes="module(print)" \ + -passes="print" \ -disable-output \ "$ANALYSIS_FILE" \ 2>&1 | tee "$3" >&2 diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 67f1327..78b946d 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -80,7 +80,7 @@ struct TerminationPassResult { // Results from analyzing the full module, // including call-graph analysis. struct ModuleTerminationPassResult { - std::map per_function_results; + std::map per_function_results; // Invalidated when: // - FunctionTerminationPass is invalidated @@ -123,7 +123,7 @@ struct ModuleTerminationPass friend llvm::AnalysisInfoMixin; }; -// Printer pass for the function termination checker +// Printer pass for the module-level termination checker class BoundedTerminationPrinter : public llvm::PassInfoMixin { public: @@ -138,6 +138,21 @@ class BoundedTerminationPrinter llvm::raw_ostream &OS; }; +// Printer pass for the module-level termination checker +class FunctionBoundedTerminationPrinter + : public llvm::PassInfoMixin { +public: + explicit FunctionBoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {} + llvm::PreservedAnalyses run(llvm::Function &IR, + llvm::FunctionAnalysisManager &FAM); + // Part of the official API: + // https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes + static bool isRequired() { return true; } + +private: + llvm::raw_ostream &OS; +}; + //------------------------------------------------------------------------------ // Free functions //------------------------------------------------------------------------------ @@ -349,6 +364,8 @@ detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { FunctionTerminationPass::Result FunctionTerminationPass::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { + llvm::ScalarEvolution &SE = FAM.getResult(F); + llvm::LoopInfo &loop_info = FAM.getResult(F); // const auto &outer_result = detect_cgscc_recursion(F, FAM); // // // If this function is part of a recursive call-graph group @@ -373,8 +390,6 @@ FunctionTerminationPass::run(llvm::Function &F, // Step 2 : do loop-level analysis. We need a ScalarEvolution to get the // loops. - llvm::ScalarEvolution &SE = FAM.getResult(F); - llvm::LoopInfo &loop_info = FAM.getResult(F); for (auto &basic_block : F) { llvm::Loop *loop = loop_info.getLoopFor(&basic_block); if (loop == nullptr) { @@ -403,10 +418,8 @@ FunctionTerminationPass::run(llvm::Function &F, } // Step 4 : join results of exiting blocks - TerminationPassResult aggregate_result = { - .elt = DoesThisTerminate::Bounded, - .explanation = "" - }; + TerminationPassResult aggregate_result = {.elt = DoesThisTerminate::Bounded, + .explanation = ""}; for (auto const &[key, value] : blocks_to_results) { if (isExitingBlock(*key)) { @@ -425,18 +438,18 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { AM.getResult(IR); auto &FAM = function_analysis_manager_proxy.getManager(); // Step 1 : function-local analysis - for (llvm::Function &function: IR) { - per_function_results.insert({&function, FAM.getResult(function)}); + for (llvm::Function &function : IR) { + per_function_results.insert( + {&function, FAM.getResult(function)}); } // Step 2 : CGSCC analysis; note recursion up-front. - // TODO: + // TODO: This is taking literally forever. No, figuratively forever // auto &call_graph = AM.getResult(IR); // Step 3 : worklist algorithm on the call graph. // TODO: - return ModuleTerminationPassResult{per_function_results}; } @@ -446,7 +459,8 @@ BoundedTerminationPrinter::run(llvm::Module &IR, OS << "Starting pass... \n"; auto &module_results = AM.getResult(IR); OS << "got results... \n"; - // for (const auto &[function, result] : module_results.per_function_results) { + // for (const auto &[function, result] : module_results.per_function_results) + // { // OS << "Function name: " << llvm::demangle(function->getName()) << "\n"; // OS << "Result: " << result.elt << "\n"; // OS << "Explanation: " << result.explanation << "\n\n"; @@ -455,6 +469,16 @@ BoundedTerminationPrinter::run(llvm::Module &IR, return llvm::PreservedAnalyses::all(); } +llvm::PreservedAnalyses +FunctionBoundedTerminationPrinter::run(llvm::Function &IR, + llvm::FunctionAnalysisManager &AM) { + auto &results = AM.getResult(IR); + OS << "For function: " << llvm::demangle(IR.getName()) + << "got result: " << results.elt << "\n"; + + return llvm::PreservedAnalyses::all(); +} + //------------------------------------------------------------------------------ // Static / wiring //------------------------------------------------------------------------------ @@ -475,15 +499,23 @@ llvm::PassPluginLibraryInfo getBoundedTerminationPassPluginInfo() { } return false; }); + PB.registerPipelineParsingCallback( + [&](StringRef Name, FunctionPassManager &PM, + ArrayRef) { + if (Name == "print") { + PM.addPass(FunctionBoundedTerminationPrinter(llvm::errs())); + return true; + } + return false; + }); PB.registerAnalysisRegistrationCallback( [](FunctionAnalysisManager &AM) { AM.registerPass([&] { return FunctionTerminationPass(); }); }); PB.registerAnalysisRegistrationCallback( - [](ModuleAnalysisManager&AM) { + [](ModuleAnalysisManager &AM) { AM.registerPass([&] { return ModuleTerminationPass(); }); }); - }}; }; From 3f52bdc016f7d4b552e85b7eb566599e276cb166 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Fri, 7 Jun 2024 10:39:04 -0400 Subject: [PATCH 08/10] Fix issue with module-level pass: handle `declare` Several (all?) testdata files were failing when the `FunctionTerminationPass` was run via proxy (from the module-level pass), instead of being run at the function level. Unfortunately, "run from the module" is the only way we think we can handle the call-graph level analysis. The symptom was LLVM stalling out -- looping forever? -- when we asked for `ScalarEvolutionAnalysis`. I opened up LLDB and stepped through, and... it was fine! Looking at `simple.c`, the pass produced results for `get_value` and `main` just fine. ...but our functions are not the only functions listed in a module! For instance, `simple.c` invokes `malloc`; as a result, `malloc` gets a `declare`d in our module, without a function body. And *that's* where the analysis was stalling out -- our pass was running on `F.getName() == "malloc"`! Apparently, function-level passes are only run across functions `define`d in the module -- but the listing of module contents includes functions `define`d or `declare`d. I haven't really checked this, but it is consistent with what we've seen, and would explain the difference between running in function/module modes. My fix (this commit) is to return `Unknown` if the function does not have a body, i.e. has no basic blocks. This gets us back to a passing -- though incorrect -- state. --- build/default.so.do | 2 +- notes/function-module.md | 53 ++++++++++++++++++++++++++++++++++ src/BoundedTerminationPass.cpp | 21 +++++++++----- 3 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 notes/function-module.md diff --git a/build/default.so.do b/build/default.so.do index 56bcbb9..2a9e587 100644 --- a/build/default.so.do +++ b/build/default.so.do @@ -10,7 +10,7 @@ FLAGS="$(cat ../compile_flags.txt)" "$LLVM_DIR"/bin/clang++ \ $FLAGS \ -Wall -fdiagnostics-color=always -fvisibility-inlines-hidden \ - -g -std=gnu++17 -fPIC \ + -glldb -std=gnu++17 -fPIC \ --write-user-dependencies -MF"$DEPFILE" \ -o "$3" \ -l LLVM \ diff --git a/notes/function-module.md b/notes/function-module.md new file mode 100644 index 0000000..eea2920 --- /dev/null +++ b/notes/function-module.md @@ -0,0 +1,53 @@ + +Notes on debugging the issue from 06c96c44: + +> Bisect issue with function-level termination + +> The same pass (`FunctionBoundedTermination`) completes as expected when +> invoked from a function-level transform pass +> (`print`), but not when invoked from a +> module-level transform pass `print` + +> Is this something weird with the proxy? + +FunctionTerminationPass gets ScalarEvolutionAnalysis and LoopAnalysis. + +On call_to_bounded_.loops, using print (module-level analysis), ScalarEvolutionAnalysis completes. +LoopAnalysis times out? No, it doesn't... + +not for "the first function", whichever that is. `get_value`, looks like. +`simple.c` does not have any loops! + +On `main`... Seems like ScalarEvolutionAnalysis is the problem? +No; both analyses complete... `getLoopFor`? + +Also completes for `main`. + +But! apparently we also run for `malloc` in `simple.loops`... +and that's where the problem comes up. ScalarEvolutionAnalysis on malloc +runs dominator-tree analysis, which gets stuck: + +``` +(lldb) expr F.getName() +(llvm::StringRef) $9 = (Data = "malloc", Length = 6) +(lldb) expr &F.BasicBlocks.Sentinel +(llvm::ilist_sentinel > *) $10 = 0x000055555563d0a0 +(lldb) expr F.BasicBlocks.Sentinel +(llvm::ilist_sentinel >) $11 = { + llvm::ilist_node_impl > = { + llvm::ilist_detail::node_options::node_base_type = { + Prev = 0x000055555563d0a0 + Next = 0x000055555563d0a0 + } + } +} +``` + +Which looks like "an empty body" (sentinel value of a linked list, pointing at itself). + +Can we detect which functions are externally-linked, and avoid analyzing them? +Or should we avoid analyzing things that are "bodyless"? + +Easy version: `if(F.empty()) { unknown }` + + diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 78b946d..b55095c 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -364,6 +364,13 @@ detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { FunctionTerminationPass::Result FunctionTerminationPass::run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) { + if(F.empty()) { + return FunctionTerminationPass::Result{ + .elt = DoesThisTerminate::Unknown, + .explanation = "has no basic blocks in this module", + }; + } + llvm::ScalarEvolution &SE = FAM.getResult(F); llvm::LoopInfo &loop_info = FAM.getResult(F); // const auto &outer_result = detect_cgscc_recursion(F, FAM); @@ -456,15 +463,13 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { llvm::PreservedAnalyses BoundedTerminationPrinter::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { - OS << "Starting pass... \n"; auto &module_results = AM.getResult(IR); - OS << "got results... \n"; - // for (const auto &[function, result] : module_results.per_function_results) - // { - // OS << "Function name: " << llvm::demangle(function->getName()) << "\n"; - // OS << "Result: " << result.elt << "\n"; - // OS << "Explanation: " << result.explanation << "\n\n"; - // } + for (const auto &[function, result] : module_results.per_function_results) + { + OS << "Function name: " << llvm::demangle(function->getName()) << "\n"; + OS << "Result: " << result.elt << "\n"; + OS << "Explanation: " << result.explanation << "\n\n"; + } return llvm::PreservedAnalyses::all(); } From b16bf3f93c4967fad65097a17aedb74932acc6b5 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Mon, 10 Jun 2024 14:27:53 -0400 Subject: [PATCH 09/10] Update notes with CGSCC information This doesn't propagate it back via the worklist, but it does tell us about recursive loops. --- src/BoundedTerminationPass.cpp | 48 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index b55095c..9fc3475 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -1,7 +1,7 @@ +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -373,15 +373,6 @@ FunctionTerminationPass::run(llvm::Function &F, llvm::ScalarEvolution &SE = FAM.getResult(F); llvm::LoopInfo &loop_info = FAM.getResult(F); - // const auto &outer_result = detect_cgscc_recursion(F, FAM); - // - // // If this function is part of a recursive call-graph group - // // (a non-trivial CGSCC), then we don't do any more analysis. - // // This way, we avoid recursing in getResult - // // for the callees of this function. - // if (outer_result.elt >= DoesThisTerminate::Unbounded) { - // return outer_result; - // } std::map blocks_to_results; // SetVector preserves insertion order - which is nice because it makes this @@ -450,9 +441,38 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { {&function, FAM.getResult(function)}); } - // Step 2 : CGSCC analysis; note recursion up-front. - // TODO: This is taking literally forever. No, figuratively forever - // auto &call_graph = AM.getResult(IR); + // Step 2 : CGSCC analysis. + // Take anything in a recursive group and force it Unknown. + // See also NoRecursionCheck in clang-tidy + llvm::CallGraph &CG = AM.getResult(IR); + for(llvm::scc_iterator SCCI = llvm::scc_begin(&CG); !SCCI.isAtEnd(); ++SCCI) { + if(!SCCI.hasCycle()) { + // SCC doesn't have a loop. We don't need to update anything. + continue; + } + // SCC has a loop. Update all functions to note they're mutually recursive. + const std::vector &nextSCC = *SCCI; + TerminationPassResult shared_result = { + .elt = DoesThisTerminate::Unknown, + .explanation = "part of a call graph that contains a loop: ", + }; + int count = 0; + for(llvm::CallGraphNode *node : nextSCC) { + const llvm::Function *f = node->getFunction(); + shared_result.explanation = (shared_result.explanation + llvm::demangle(f->getName())); + if(count < nextSCC.size() - 1) { + shared_result.explanation += ", "; + } else { + ++count; + } + } + for(llvm::CallGraphNode *node : nextSCC) { + llvm::Function *f = node->getFunction(); + const auto new_result = update(per_function_results[f], {shared_result}); + per_function_results[f] = new_result; + } + } + // TODO // Step 3 : worklist algorithm on the call graph. // TODO: From 50acd2e114f8ad6856d137d1389bb81e8f453574 Mon Sep 17 00:00:00 2001 From: Charles Eckman Date: Mon, 10 Jun 2024 15:08:10 -0400 Subject: [PATCH 10/10] Propagate results through call graph --- src/BoundedTerminationPass.cpp | 47 ++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp index 9fc3475..133e061 100644 --- a/src/BoundedTerminationPass.cpp +++ b/src/BoundedTerminationPass.cpp @@ -80,7 +80,7 @@ struct TerminationPassResult { // Results from analyzing the full module, // including call-graph analysis. struct ModuleTerminationPassResult { - std::map per_function_results; + std::map per_function_results; // Invalidated when: // - FunctionTerminationPass is invalidated @@ -430,7 +430,7 @@ FunctionTerminationPass::run(llvm::Function &F, ModuleTerminationPass::Result ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { - std::map per_function_results; + std::map per_function_results; auto &function_analysis_manager_proxy = AM.getResult(IR); @@ -459,6 +459,17 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { int count = 0; for(llvm::CallGraphNode *node : nextSCC) { const llvm::Function *f = node->getFunction(); + // May be null: + // 1. Exported functions get edges "in from" the null node, to represent external calls. + // (We don't care about these.) + // 2. Calls that leave this module, or have an indirect function call, + // have calls out to another "null" node. + // Indirect functions are handled at the function-local layer. + // Extern functions are handled as "unknown body". + // So we should be fine to skip? + if(f == nullptr) { + continue; + } shared_result.explanation = (shared_result.explanation + llvm::demangle(f->getName())); if(count < nextSCC.size() - 1) { shared_result.explanation += ", "; @@ -472,10 +483,36 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) { per_function_results[f] = new_result; } } - // TODO - // Step 3 : worklist algorithm on the call graph. - // TODO: + // Ideally we'd _just_ do worklist, but we don't have a "callers" list, alas. + // So we just run N^2: scan through each function, update from callees, + // and run again if we updated something. + bool stale = true; + while(stale) { + stale = false; + for(auto &F : IR) { + TerminationPassResult original = per_function_results[&F]; + const llvm::CallGraphNode *CGNode = CG[&F]; + std::vector results; + + // Update this node from its successors. + for(const auto &it : *CGNode) { + llvm::CallGraphNode *callee = it.second; + if(auto *CalleeF = callee->getFunction(); CalleeF != nullptr) { + const auto &result = per_function_results[CalleeF]; + results.emplace_back(TerminationPassResult{ + .elt = result.elt, + .explanation = "via call to " + llvm::demangle(CalleeF->getName()) + ": " + result.explanation, + }); + } + } + auto altered = update(original, std::move(results)); + if (altered.elt != original.elt) { + per_function_results[&F] = altered; + stale = true; + } + } + } return ModuleTerminationPassResult{per_function_results}; }