From 0c50686c3839326593968ba440b321a1d3af1feb Mon Sep 17 00:00:00 2001
From: Charles Eckman <charles@cceckman.com>
Date: Thu, 13 Jun 2024 11:35:10 -0400
Subject: [PATCH 1/2] Add notes on talk

---
 src/BoundedTerminationPass.cpp |  56 +--------
 talk/.gitignore                |   5 +
 talk/commands.md               |  28 +++++
 talk/notes.md                  |  99 +++++++++++++++
 talk/transcript.txt            | 215 +++++++++++++++++++++++++++++++++
 5 files changed, 352 insertions(+), 51 deletions(-)
 create mode 100644 talk/.gitignore
 create mode 100644 talk/commands.md
 create mode 100644 talk/notes.md
 create mode 100644 talk/transcript.txt

diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp
index 133e061..72888ca 100644
--- a/src/BoundedTerminationPass.cpp
+++ b/src/BoundedTerminationPass.cpp
@@ -212,6 +212,11 @@ TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) {
         // target are, and our CG analysis won't cover it.
         // (CG covers CGSCC/recursion detection, as well as propagating
         // unbounded backwards .)
+        // TODO: Is this necesary, given that we're analyzing calls at the module layer?
+        // The CallGraph has a lot more information about what calls are possible,
+        // and includes a "null" node that indirect calls point to.
+        // Whole-program devirtualization might even take an indirect call and turn it
+        // into a direct call.
         return TerminationPassResult{.elt = DoesThisTerminate::Unknown,
                                      .explanation =
                                          "Performs an indirect function call"};
@@ -306,57 +311,6 @@ bool isExitingBlock(const llvm::BasicBlock &B) {
   return terminator->willReturn();
 }
 
-TerminationPassResult
-detect_cgscc_recursion(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) {
-  auto &MAM = FAM.getResult<llvm::ModuleAnalysisManagerFunctionProxy>(F);
-  // Process invalidation based on the outer result;
-  // this makes sure we're invalidated if the LazyCallGraphAnalysis changes.
-  MAM.registerOuterAnalysisInvalidation<llvm::LazyCallGraphAnalysis,
-                                        FunctionTerminationPass>();
-  const llvm::LazyCallGraph *CG =
-      MAM.getCachedResult<llvm::LazyCallGraphAnalysis>(*F.getParent());
-  if (CG == nullptr) {
-    return TerminationPassResult{
-        .elt = DoesThisTerminate::Unknown,
-        .explanation = "no LazyCallGraph for " + llvm::demangle(F.getName()),
-    };
-  }
-  llvm::LazyCallGraph::Node *cg_node = CG->lookup(F);
-  if (cg_node == nullptr) {
-    return TerminationPassResult{
-        .elt = DoesThisTerminate::Unknown,
-        .explanation =
-            "no LazyCallGraph::Node for " + llvm::demangle(F.getName()),
-    };
-  }
-  llvm::LazyCallGraph::SCC *C = CG->lookupSCC(*cg_node);
-  if (C == nullptr) {
-    return TerminationPassResult{
-        .elt = DoesThisTerminate::Unknown,
-        .explanation =
-            "no LazyCallGraph::SCC for " + llvm::demangle(F.getName()),
-    };
-  }
-
-  auto N = C->begin();
-  if (C->size() > 1 || ((*N)->lookup(*N) != nullptr)) {
-    // Recursive SCC:
-    // either >1 node,
-    // or 1 node with a self-edge.
-    return TerminationPassResult{
-        .elt = DoesThisTerminate::Unknown,
-        .explanation =
-            "function " + llvm::demangle(F.getName()) +
-            " is one of a set of mutually recursive functions: " + C->getName(),
-    };
-  }
-  // Otherwise, this is...unevaluated?
-  return TerminationPassResult{
-      .elt = DoesThisTerminate::Bounded,
-      .explanation = "",
-  };
-}
-
 //------------------------------------------------------------------------------
 // Pass bodies
 //------------------------------------------------------------------------------
diff --git a/talk/.gitignore b/talk/.gitignore
new file mode 100644
index 0000000..ff919cf
--- /dev/null
+++ b/talk/.gitignore
@@ -0,0 +1,5 @@
+*.mkv
+*.aac
+*.wav
+*.mp4
+
diff --git a/talk/commands.md b/talk/commands.md
new file mode 100644
index 0000000..6daca81
--- /dev/null
+++ b/talk/commands.md
@@ -0,0 +1,28 @@
+
+I recorded with OBS, then got this trancription using `whisper.cpp`.
+
+```
+ffmpeg -i 2024-06-13_09-09-01.mkv -ac 1 -ar 16000 2024-06-13-09-09-01.wav
+```
+
+- `-ac 1`: extract audio channel 1
+- `-ar 16000`: convert to 16kHz audio; this is what whisper.cpp expects.
+
+```
+/home/cceckman/r/github.com/ggerganov/whisper.cpp/main -of transcript.txt -m /home/cceckman/r/github.com/ggerganov/whisper.cpp/models/ggml-large-v3-q5_0.bin -f *.wav
+```
+
+Initially I tried this; it seemed to work, but was going slowly; switched to
+more threads and a smaller model:
+
+```
+/home/cceckman/r/github.com/ggerganov/whisper.cpp/main \
+    -t 7 \
+    -m /home/cceckman/r/github.com/ggerganov/whisper.cpp/models/ggml-small.en.bin \
+    -f *.wav \
+    | tee transcript.txt
+```
+
+That ran a lot faster; maybe close to real-time? I didn't time it.
+
+
diff --git a/talk/notes.md b/talk/notes.md
new file mode 100644
index 0000000..8bf483e
--- /dev/null
+++ b/talk/notes.md
@@ -0,0 +1,99 @@
+
+
+- Motivation: Critical section termination to allow ISRs to run
+- How to check that it's bounded? Code review, empirical, program analysis,
+    
+  "write it that way", in e.g. Coq
+
+  What are we analyzing? "termination", Strong notion of progress; ignores e.g. volatile operations
+  Critical Section terminates
+
+- Isn't this this halting problem?
+
+  Explain halting problem; and "maybe-halting problem"
+
+  Trivial proof that you can do this
+
+  https://en.wikipedia.org/wiki/Rice%27s_theorem
+
+  useful not universal
+
+- borrowck: example conservative though accurate, and here's how you get around non-universality:
+  - escape and assume (`unsafe`)
+  - restructure subject (rewrite your code)
+  - enhance the checker (e.g. Polonius, NLL, Miri)
+
+- Non-trivial analysis -- prove three things:
+
+  1. Inter-function termination: the calls in the critical section
+  2. Intra-function termination: the basic blocks in each function
+  3. Instruction-level termination: the instructions in each basic block
+
+- (1) Calls:
+  - If all functions transitively called by the critical section terminate (2),
+  - and the call graph is acyclic,
+  then the critical section will terminate.
+
+  Show a call graph; flowchart!
+
+  (Long version: talk about SCCs, worklist algorithm.)
+
+- (2) Intra-function termination, version 1:
+  - If all instructions in the function terminate (3), and
+  - the control-flow graph of the function is acyclic,
+  then the critical section will terminate.
+
+  Control flow: "basic blocks", contain instructions.
+  Just like the call graph!
+
+- (3) Instructions terminate
+
+  This isn't actually true. But we assume it.
+  It's a different _kind_ of bug if you get an incomplete memory operation.
+
+  (footnote on cceckman's experience)
+
+This is accurate, but conservative: it does not allow recursion (1)
+or loops (2).
+
+Come back to (2):
+
+- (2) Intra-function termination, version 2:
+  - If all instructions in the function terminate (3), and
+  - all cycles escape in a bounded amount of iterations,
+  then the critical section will terminate.
+
+  Here we have "enhance"!
+
+  Use LLVM loop canonicalization + analysis; LLVM uses these for loop unrolling.
+  (Lots of existing research. We just built on it.)
+
+
+Limitations:
+
+- Calls outside of the module (compilation unit: crate for Rust, .o for C/C++)
+    - LLVM can do whole-program analysis, we haven't tried it yet;
+      expensive as programs get bigger
+- ...including intrinsics, which show up everywhere
+    - Solution we want: escape, `unsafe` equivalent:
+      "Assume this terminates" or "assume this does not terminate"
+
+- No recursion
+  - May be an enhancement, with more research!
+- Indirect calls
+- Just LLVM IR
+- Assumption of instruction progress
+- We haven't implemented LLVM analysis invalidation yet, so don't run this with any transform passes!
+
+
+Bonus: gotchas!
+
+- Clang + LLVM inline very aggressively -- pretty much whenever it can in our test programs.
+- Clang and LLVM report a free-form "annotation" attribute, but it doesn't seem to make it from C to the IR.
+  - We wanted -- and still want -- to use this for labeling/ escapes.
+- Module-level vs. function-level passes.
+  - A function-level pass cannot get mutable module-level results. Even if they're cached and still valid.
+  - Module-level passes cover external functions (`declare`) as well as local (`define`)...
+  - ...and the lopo analysis pass doesn't terminate when given an empty-bodied function. :-D
+- Loop analysis only works after loop canonicalization. So we're running on the output of -01.
+
diff --git a/talk/transcript.txt b/talk/transcript.txt
new file mode 100644
index 0000000..58314da
--- /dev/null
+++ b/talk/transcript.txt
@@ -0,0 +1,215 @@
+
+[00:00:00.000 --> 00:00:17.540]   This is a draft of my talk/blog post on Sarah Connor, which is a termination checker. Analysis
+[00:00:17.540 --> 00:00:32.540]   task in written in C++ for LLVM IR. I worked on this with Anbek Grover and I am CC Ekman.
+[00:00:32.540 --> 00:00:39.700]   So what's the problem we're dealing with? So we have a... I used to work in embedded
+[00:00:39.700 --> 00:00:47.660]   development, and one of the things that you have in embedded development are interrupts.
+[00:00:47.660 --> 00:00:54.300]   The thing about interrupts is that they preempt the normal thread of execution. So you're
+[00:00:54.300 --> 00:01:01.180]   going along with whatever your program is and an interrupt comes in, and you switch
+[00:01:01.180 --> 00:01:07.620]   to this interrupt handler. When the interrupt handler completes, you drop back down to the
+[00:01:07.620 --> 00:01:19.780]   normal thread of execution. This is called the, often an IRQ thread or an interrupt thread.
+[00:01:19.780 --> 00:01:25.980]   So this is all well and good, because it means that you can respond quickly to this interrupt
+[00:01:25.980 --> 00:01:44.460]   when it happens. The problem is that that's not always okay. It's not always okay to get
+[00:01:44.460 --> 00:01:54.860]   interrupted like this. Let's say your interrupt handler sort of puts something into a queue
+[00:01:54.860 --> 00:02:01.240]   and then the thread pulls something out of that queue. Well, depending on how the queue
+[00:02:01.240 --> 00:02:07.860]   is structured, it may not be safe to, when the thread is in the process of pulling out
+[00:02:07.860 --> 00:02:16.820]   of that queue, start a new interrupt handler. Or there's other cases where maybe you have
+[00:02:16.820 --> 00:02:20.340]   multiple threads and they're getting preempted. This is common in operating systems. So you
+[00:02:20.340 --> 00:02:25.920]   have this one, you go to an interrupt, and then you go to this other thread, and then
+[00:02:25.920 --> 00:02:36.460]   it gets interrupted and you drop back down to your original thread. So T2, T1. And maybe
+[00:02:36.460 --> 00:02:43.260]   sometimes you are doing something in this thread where you need it to execute atomically.
+[00:02:43.260 --> 00:02:50.140]   You need to be in thread one, and then even if an interrupt comes in here, then you need
+[00:02:50.140 --> 00:03:00.140]   to continue executing until you reach the end of this critical section, as we call it.
+[00:03:00.140 --> 00:03:04.780]   And once that's done, you can go back to the interrupt, but this portion needs to execute
+[00:03:04.780 --> 00:03:11.700]   atomically. It needs to finish executing before we jump to anything else. Now, the problem
+[00:03:11.700 --> 00:03:24.180]   with critical sections is time, specifically latency. If I have my threaded execution,
+[00:03:24.180 --> 00:03:34.100]   I have my critical section, and if this is, let's say it takes 10 microseconds to execute,
+[00:03:34.100 --> 00:03:40.860]   if an interrupt happens right there, let's say at time is one microsecond, normally without
+[00:03:40.860 --> 00:03:47.620]   the critical section, my interrupt latency would be one microsecond. It would be effectively
+[00:03:47.620 --> 00:04:01.540]   minimal. Maybe it only takes a couple of nanoseconds to jump to that interrupt handler. So without
+[00:04:01.540 --> 00:04:10.420]   that critical section, maybe it's five nanoseconds or something. It's probably closer to 20.
+[00:04:10.420 --> 00:04:18.740]   In order to jump to that other interrupt handler. But with this, now this interrupt gets blocked,
+[00:04:18.740 --> 00:04:29.380]   it gets held all the way until that critical section is done. So now we've added in the
+[00:04:29.380 --> 00:04:36.300]   worst case, the entire latency of this critical section to our interrupt handling latency.
+[00:04:36.300 --> 00:04:43.460]   And it's really bad if this critical section is unbounded, right? We can sort of account
+[00:04:43.460 --> 00:04:50.420]   for well, the longest critical section is, let's say we've measured it and said, okay,
+[00:04:50.420 --> 00:05:02.340]   it's 10 microseconds or whatever. So we say our interrupt latency is about 10 microseconds.
+[00:05:02.340 --> 00:05:07.700]   We know that it's going to be in the worst case, as long as that critical section, that
+[00:05:07.700 --> 00:05:12.000]   longest critical section. If that critical section isn't bounded, and we're going to
+[00:05:12.000 --> 00:05:21.620]   last 30 seconds or something while we are running this thread, then that's really bad.
+[00:05:21.620 --> 00:05:34.740]   We don't have any guarantees of how quickly we can service an interrupt. I realized I
+[00:05:34.740 --> 00:05:45.220]   marked this IRQ, and I should mark that ISR, interrupt service routine. So we don't have
+[00:05:45.220 --> 00:05:54.060]   any interrupt guarantees if we don't have a bounds on interrupt latency. So if we don't
+[00:05:54.060 --> 00:06:08.300]   have any bounds on the critical section, how do we bound the time in that critical section?
+[00:06:08.300 --> 00:06:18.220]   You can do it empirically, right? We can say, well, we've never seen it beyond whatever
+[00:06:18.220 --> 00:06:27.340]   amount of time. And you can get away with that sometimes. It's not very precise. It's
+[00:06:27.340 --> 00:06:45.180]   not very forward. Another way would be to say we have bounds on the work done. So we
+[00:06:45.180 --> 00:06:54.700]   know that it's only going to be some number of instructions. We know that it's only going
+[00:06:54.700 --> 00:07:06.060]   to execute so many instructions. And we have a bounds on those instructions completing.
+[00:07:06.060 --> 00:07:11.580]   So we say, OK, we know that at worst the memory is going to take this long to access. Any
+[00:07:11.580 --> 00:07:18.900]   given instruction is only going to take this long to access on the CPU. And so we in net
+[00:07:18.900 --> 00:07:25.700]   know that these two together mean that we'll only execute for a certain amount of time.
+[00:07:25.700 --> 00:07:37.500]   OK, so how do you get-- and so you can do this upfront with just program analysis without
+[00:07:37.500 --> 00:07:43.420]   ever running the program. And you can sort of do program analysis two ways, right? One
+[00:07:43.420 --> 00:07:53.620]   is just code review. But I like to say that a lot of programming is the process of encoding
+[00:07:53.620 --> 00:08:07.020]   judgment and automating judgment. So can we do it with a computer? Also, humans are faulty.
+[00:08:07.020 --> 00:08:11.900]   We have to get it right every time in code review. Can we get it right once, get our
+[00:08:11.900 --> 00:08:18.660]   algorithm right run once, and then just have the computer do it repeatedly? There's a lot
+[00:08:18.660 --> 00:08:24.860]   of things like this. So I came up with this idea to do this kind of analysis when I was
+[00:08:24.860 --> 00:08:31.500]   working on a system where we had this problem. We were doing these checks with code review.
+[00:08:31.500 --> 00:08:39.740]   And I proposed doing this because there's sort of analogies with LLVM, or I guess it's
+[00:08:39.740 --> 00:08:53.300]   Clang specifically, which is a C and C++ compiler, has something called thread safety annotations.
+[00:08:53.300 --> 00:08:59.220]   These are a big part of the inspiration for this project, where you can say a variable
+[00:08:59.220 --> 00:09:17.380]   is guarded by a mutex. And the compiler will check any time that this variable is accessed,
+[00:09:17.380 --> 00:09:23.900]   this mutex is held. And it will tell you that statically. You don't have to check this rule
+[00:09:23.900 --> 00:09:33.020]   by hand. So this was part of the inspiration. So I proposed writing this. And multiple co-workers
+[00:09:33.020 --> 00:09:46.780]   said isn't this the halting problem? And the answer is no. I'll get to that in a second.
+[00:09:46.780 --> 00:09:54.740]   So for those unfamiliar, what is the halting problem? This was Alan Turing's big result,
+[00:09:54.740 --> 00:10:03.140]   one of his first big results. I think the paper was 1935 or 1938, somewhere in there.
+[00:10:03.140 --> 00:10:16.820]   Very big deal paper. And the key result is something like this. There is no program G
+[00:10:16.820 --> 00:10:46.580]   that for any program F can determine if F terminates. So in general, does this program
+[00:10:46.580 --> 00:10:54.660]   this critical section, does this terminate? In general, you cannot answer that question.
+[00:10:54.660 --> 00:11:03.040]   You cannot come up with an algorithm that for any such program, one of these, will tell
+[00:11:03.040 --> 00:11:23.060]   you that result. And that is unfortunate. So there's no program G that returns a yes
+[00:11:23.060 --> 00:11:30.980]   or no answer. So here's why this isn't the halting problem. I don't actually need to
+[00:11:30.980 --> 00:11:36.620]   answer this question for any possible critical section. I don't even always need to get an
+[00:11:36.620 --> 00:11:43.540]   answer for my programs. So the question that we're dealing with is what I sort of call
+[00:11:43.540 --> 00:12:10.780]   the maybe halting problem, which is to write a program G that for any program F reports
+[00:12:10.780 --> 00:12:34.140]   whether F well, either reports that F terminates in a bounded amount of time. F does not terminate
+[00:12:34.140 --> 00:12:50.340]   is unbounded such programs exist, or that it cannot determine whether or not it terminates.
+[00:12:50.340 --> 00:12:59.740]   And the proof that such a program G exists is trivial. Or that a class program to exist
+[00:12:59.740 --> 00:13:06.500]   is trivial. Does anyone want to help me write this? We'll say define G of F. And what are
+[00:13:06.500 --> 00:13:22.460]   we going to return? We've done it. We've solved the maybe halting problem. We can trivially
+[00:13:22.460 --> 00:13:27.740]   always return I don't know. And we're always accurate because we're not making a statement
+[00:13:27.740 --> 00:13:32.220]   actually about this program, we're making a statement about this program, when we return
+[00:13:32.220 --> 00:13:38.540]   I don't know. If we return a bounded or does not terminate result for F, then we're making
+[00:13:38.540 --> 00:13:45.060]   a statement about F, but we can always fall back to I don't know. Now, this version of
+[00:13:45.060 --> 00:13:54.340]   this program is not useful. However, some of these programs can be useful. You know,
+[00:13:54.340 --> 00:14:04.620]   not universal, but useful. Because if this cannot report, I don't know, if it reports
+[00:14:04.620 --> 00:14:08.780]   anything other than I don't know, then we know that we have a problem. If it reports,
+[00:14:08.780 --> 00:14:16.380]   I don't know, then maybe we need to either enhance our checker, or we need to rewrite
+[00:14:16.380 --> 00:14:21.900]   our program such that the automated reasoning can cover it. Or we need to put in some sort
+[00:14:21.900 --> 00:14:29.580]   of escape hatch such that the automated reasoner will just assume our result is correct. An
+[00:14:29.580 --> 00:14:37.220]   analogy here is the Rust borrow checker, which remains one of my favorite sort of pieces
+[00:14:37.220 --> 00:14:43.980]   of software to have in my workflow. The borrow checker can't always reason about whether
+[00:14:43.980 --> 00:14:51.060]   your use of memory and pointers is correct. Correct with respect to lifetime safety and
+[00:14:51.060 --> 00:14:57.900]   bounds checking and all of that. It can't always reason about that. And if it gives
+[00:14:57.900 --> 00:15:05.460]   you a result that says, this is, you know, I don't, I don't check this, I don't believe
+[00:15:05.460 --> 00:15:12.460]   that this is correct, you have three options. One is, if you're really, really sure, you
+[00:15:12.460 --> 00:15:26.360]   can use unsafe. You can say, believe me. You can update the borrow checker. And there's
+[00:15:26.360 --> 00:15:31.700]   lots of efforts where they've, they've enhanced what's going on in here to make it more precise,
+[00:15:31.700 --> 00:15:39.700]   to add non-lexical lifetimes and split borrows and enhancing it to make it more precise,
+[00:15:39.700 --> 00:15:46.340]   right, not less accurate, but more precise so that it can accept more things and reason
+[00:15:46.340 --> 00:15:55.980]   about more things. Or you can restructure your program in order to make it something
+[00:15:55.980 --> 00:16:00.860]   that adheres to these limits or that pushes them to a runtime check rather than a compile
+[00:16:00.860 --> 00:16:06.500]   time check. Any of these things, you have these options. And so we're looking at the
+[00:16:06.500 --> 00:16:11.700]   same thing, right? If we get a result that says, I don't know, that can still be useful
+[00:16:11.700 --> 00:16:24.820]   to us, even if it's not, not always, even if we get it more than we would like. So,
+[00:16:24.820 --> 00:16:36.940]   all that said, how do we make a useful one of these, right? A non-trivial one. So, the
+[00:16:36.940 --> 00:16:45.460]   main thing that we're, we're concerned with is we're going to go ahead and assume that
+[00:16:45.460 --> 00:16:55.100]   every instruction terminates. This isn't actually strictly true. I've run into systems where
+[00:16:55.100 --> 00:17:02.460]   you'll do an MMIO operation and it will hang forever because it's waiting on some peripheral
+[00:17:02.460 --> 00:17:08.220]   that doesn't respond. We're not concerned with that here. That is outside of the scope
+[00:17:08.220 --> 00:17:15.140]   of this problem. If you are doing such an access, that's a program level, that's a problem
+[00:17:15.140 --> 00:17:18.500]   with that load that you're doing that in the critical section. It's not something that
+[00:17:18.500 --> 00:17:25.140]   we expect to detect. But, so we can start by assuming that. And then we can start thinking
+[00:17:25.140 --> 00:17:32.660]   about, well, what properties are we really looking for? And an easy and fairly naive
+[00:17:32.660 --> 00:17:39.060]   way of getting some sort of a useful result is, so we'll call this version one, which
+[00:17:39.060 --> 00:17:53.060]   is no loops. If there are no cycles in the control flow graph of whatever sub-program
+[00:17:53.060 --> 00:18:01.300]   we're analyzing, then we will, then it will eventually terminate, right? So, this assumption
+[00:18:01.300 --> 00:18:10.260]   ensures progress of an individual instruction. This ensures progress through groups of instructions.
+[00:18:10.260 --> 00:18:19.540]   And so eventually we will reach a terminator for this sub-program. So, more specifically,
+[00:18:19.540 --> 00:18:32.580]   what we're saying is basic block control flow graph, that is intra-function control flow
+[00:18:32.580 --> 00:18:37.940]   graph, is a DAG, directed acyclic graph. So, if it is a directed acyclic graph, so you
+[00:18:37.940 --> 00:18:43.860]   can have go-tos, but there are no cycles in it. So, you're always going to progress through
+[00:18:43.860 --> 00:18:48.740]   it. And the longest path through that DAG, from the start to some termination, gives
+[00:18:48.740 --> 00:18:52.260]   us our longest bound. And we're not going to try to measure that right now, but it gives
+[00:18:52.260 --> 00:19:03.380]   us, like, there is a bound. We also need, so let me put in hand conditioning there. The
+[00:19:03.380 --> 00:19:17.060]   call graph is a DAG, or it is acyclic, right? So, this is at the basic block level, a loop,
+[00:19:17.060 --> 00:19:24.420]   a cycle in basic blocks indicates a loop. A cycle in the call graph indicates recursion.
+[00:19:24.420 --> 00:19:33.940]   So, if we have these two properties, and we assume this property, we'll always have progress
+[00:19:33.940 --> 00:19:40.260]   between functions, we'll always have progress within a function. So, if all of our functions
+[00:19:40.260 --> 00:19:45.940]   that we're calling this adhere to these properties, then, great, we're done. We can actually say,
+[00:19:46.500 --> 00:19:54.340]   yes, this will terminate. And that is a useful result. So, how do we get this analysis?
+[00:19:54.340 --> 00:20:06.740]   LLVM has these analysis passes. So, an LLVM analyzer will get us a call graph.
+[00:20:06.740 --> 00:20:15.380]   It'll tell us for every function what are the functions that it calls. And it has specifically
+[00:20:15.380 --> 00:20:26.500]   a call graph SCC. SCC stands for simply connected components, which is a term that I learned from
+[00:20:26.500 --> 00:20:33.140]   Anbei, who I worked on this with. And it refers to a portion of a graph where all of the nodes have
+[00:20:33.140 --> 00:20:40.980]   some path to every other node in that graph. So, we can look at these simply connected components,
+[00:20:40.980 --> 00:20:48.100]   and LLVM will actually tell us if there is a cycle in this simply connected component.
+[00:20:48.100 --> 00:20:53.220]   If there is, then we know that there is recursion in this portion of our call graph, and we know
+[00:20:53.220 --> 00:20:59.060]   that we have a problem, we need to say I can't guarantee that this will terminate. LLVM also
+[00:20:59.060 --> 00:21:12.260]   gives us a basic block structure within a function. And that can also give us a set of SCCs
+[00:21:12.260 --> 00:21:18.820]   at that level. And again, we can check that for cycles. And if there are cycles, we can report,
+[00:21:18.820 --> 00:21:26.100]   yeah, I can't make any guarantees. So, we've written analysis passes that will do this,
+[00:21:26.100 --> 00:21:33.300]   and we'll report back basically for, okay, so there's one other level which is sort of,
+[00:21:33.300 --> 00:21:36.500]   you can get these results for an individual component, and then
+[00:21:36.500 --> 00:21:42.980]   propagate through the call graph and the basic blocks. And there's some sort of wiring that you
+[00:21:42.980 --> 00:21:47.780]   have to do for this. But the basic idea is that you can, once you have a result from one of these,
+[00:21:47.780 --> 00:21:55.860]   you can say, well, so this function has a loop in it, let me propagate that to the other portions
+[00:21:55.860 --> 00:22:00.340]   of this. Therefore, if you take this path, you will get this result, propagate that up to the
+[00:22:00.340 --> 00:22:06.740]   call graph layer. And so then you can say, well, this function, it calls, you know, it only calls
+[00:22:06.740 --> 00:22:12.100]   things which are in a DAG, but it calls a function which is not guaranteed to terminate.
+[00:22:12.100 --> 00:22:23.140]   Therefore, I guess this is the other condition is that each function in call graph
+[00:22:24.420 --> 00:22:30.660]   meets this criterion. So, that's probably a more precise phrasing is start with this,
+[00:22:30.660 --> 00:22:39.940]   go to this, and then make this assumption. So, this is progress at the call layer,
+[00:22:39.940 --> 00:22:45.540]   this is progress at the basic block layer, so progress through basic blocks,
+[00:22:45.540 --> 00:22:59.780]   and this is progress through instructions. Okay, so this is itself useful, right? This will
+[00:22:59.780 --> 00:23:05.620]   basically check for us, there's no recursion, there's no loops, and that is enough for us to accept
+[00:23:05.620 --> 00:23:11.940]   some set of programs, some simple set of programs, but some set of programs, and make a guarantee
+[00:23:11.940 --> 00:23:17.620]   that they will complete in a bounded amount of time. We can get slightly more sophisticated,
+[00:23:17.620 --> 00:23:23.220]   though, by doing the same as the above, but loosening the restrictions. So, we're still going
+[00:23:23.220 --> 00:23:30.420]   to say condition one, which is call graph is a DAG. So, we're still not allowing mutual recursion,
+[00:23:30.420 --> 00:23:35.220]   basically, because we don't have a good way to reason about it. There's definitely research on
+[00:23:35.220 --> 00:23:44.340]   this, but we have not done it. So, we're skipping that for now. Two is that the intra-function
+[00:23:44.340 --> 00:24:04.980]   control photograph. Only either is a DAG. So, let's actually say is a DAG of SCC, it's because
+[00:24:04.980 --> 00:24:19.700]   the one in the SCC is otherwise, and each SCC is bounded. So, there are some kinds of loops
+[00:24:19.700 --> 00:24:26.420]   where the compiler can actually determine, yes, this loop is bounded. So, if you have, for instance,
+[00:24:26.420 --> 00:24:37.220]   a sort of canonical for loop, for int i is zero, i is less than five, increment i,
+[00:24:37.220 --> 00:24:48.980]   do stuff in here. The compiler can reason that this is bounded as long as this is bounded.
+[00:24:48.980 --> 00:24:54.980]   So, as long as this loop body completes in a finite amount of time, it will complete a
+[00:24:54.980 --> 00:25:00.580]   finite number of loops. Therefore, overall, it is bounded even though there is a loop.
+[00:25:00.580 --> 00:25:11.940]   So, this would be a really useful result to get. Luckily, LLVM makes this easy for us because it
+[00:25:11.940 --> 00:25:19.620]   has this thing called the loop pass. There's actually a few different kinds of loop passes.
+[00:25:21.780 --> 00:25:29.300]   One of them canonicalizes loops into a way that LLVM likes to analyze, but one of them will give
+[00:25:29.300 --> 00:25:44.100]   us the SCCs and will tell us if they are bounded. And this analysis, again, is accurate but not
+[00:25:44.100 --> 00:25:55.060]   precise. It is conservative. If LLVM tells us it is bounded, we believe that it is bounded,
+[00:25:55.060 --> 00:26:00.900]   and we'll use that result. If LLVM tells us that it is not bounded, well, maybe we humans
+[00:26:00.900 --> 00:26:08.420]   can reason that it is bounded, but the compiler can't. And so, we're, again, returning that as a
+[00:26:08.420 --> 00:26:14.580]   "I can't tell." Our analysis is not sophisticated enough to do the same reasoning that a human
+[00:26:14.580 --> 00:26:22.100]   would. And that's okay. We're only asking it to do what it can do, and we'll punt that back to the
+[00:26:22.100 --> 00:26:28.580]   humans if it gets a result that I can't reason that. So, this gives us more things that we can
+[00:26:28.580 --> 00:26:33.220]   say will make progress within a function, and we're going to keep that third criterion of
+[00:26:33.780 --> 00:26:41.860]   instruction progress. So, this is what we have implemented, or what we think we have implemented,
+[00:26:41.860 --> 00:26:48.420]   is this kind of analysis which runs through every function in an LLVM module and tries to do this.
+[00:26:48.420 --> 00:26:55.620]   And it works. The results that we have seem to be accurate based on our expectations of the
+[00:26:55.620 --> 00:27:04.900]   limitations of this. There are a few limitations. One limitation is assumption of instruction
+[00:27:04.900 --> 00:27:19.700]   progress. Another is that we only have LLVM IR analysis, so we're limited in the languages
+[00:27:19.700 --> 00:27:30.020]   that we can do. We're relying on LLVM's loop passes. Another is that this is Intramodule.
+[00:27:30.020 --> 00:27:44.900]   So, this is a sort of a .O file for C or C++. It is a crate in Rust, which we haven't tested yet.
+[00:27:49.060 --> 00:27:58.500]   So, we can't peek across calls that go between modules. So, if you call malloc,
+[00:27:58.500 --> 00:28:12.020]   we consider malloc. We don't know whether this will terminate, and so that sort of poisons your
+[00:28:12.020 --> 00:28:20.500]   whole graph if you're doing this sort of thing. As long as you're not... Well, the real problem here
+[00:28:20.500 --> 00:28:33.540]   is intrinsics. LLVM will insert, or the compiler will insert, these intrinsic operations like
+[00:28:33.540 --> 00:28:43.780]   lifetime_stark or other sort of bookkeeping things in order to provide injection points for things
+[00:28:43.780 --> 00:28:50.340]   like other sorts of analysis passes or dynamic analysis. And we don't have a model for those.
+[00:28:50.340 --> 00:28:57.460]   We have not classified those intrinsics in order to say yes or no, this will terminate or this
+[00:28:57.460 --> 00:29:07.300]   won't terminate. The sort of more general case of this is bypasses. We would like to be able to say
+[00:29:07.300 --> 00:29:14.100]   something like malloc and give it an annotation.
+[00:29:14.100 --> 00:29:26.580]   Assume that it terminates. Now, maybe malloc isn't a good idea for this, but something like
+[00:29:27.540 --> 00:29:38.180]   what's a good example, LLVM lifetime_stark or something like that. So, for these intrinsics.
+[00:29:38.180 --> 00:29:48.340]   Or if you have my_special_function, you'd want to say the same thing. Actually, a really important
+[00:29:48.340 --> 00:30:02.740]   one of these is atomic, let's say fetch_add, which is an intrinsic in the IR, but it compiles
+[00:30:02.740 --> 00:30:10.020]   to a loop on some architectures because basically you have to try a memory operation that's
+[00:30:10.020 --> 00:30:19.700]   weekly ordered and then retry it several times. So you may want to annotate that as assume something,
+[00:30:19.700 --> 00:30:28.260]   bounded or unbounded. But also, if you have your own function where, well, you have a guarantee
+[00:30:28.260 --> 00:30:34.980]   that this list will be bounded by some maximum and you're going to go over each item in the list,
+[00:30:34.980 --> 00:30:39.940]   but there's only going to be a bounded number of items in it, LLVM might not be able to tell
+[00:30:39.940 --> 00:30:48.660]   that, but you can. You may be able to say, yes, that's fine. I know that this will be terminated,
+[00:30:48.660 --> 00:30:53.140]   and therefore, please just assume that this function is good. And then you can rely on
+[00:30:53.140 --> 00:31:00.180]   that analysis and other things. So this is sort of the moral equivalent of unsafe for the borrow
+[00:31:00.180 --> 00:31:07.380]   checker. You are telling the analyzer, I know you can't prove it, but I believe it to be true, and
+[00:31:08.260 --> 00:31:13.140]   so just trust me on this. And that is a perfectly fine thing for us to include. We just haven't
+[00:31:13.140 --> 00:31:22.740]   figured out how to actually get it to work with LLVM. So that is what we have. And if you want to
+[00:31:22.740 --> 00:31:29.380]   try it out, the repo is in my GitHub. And yeah, I look forward to talking with you more about it
+[00:31:29.380 --> 00:31:30.900]   if anyone is interested.
+

From eeae3cea481c37a39450f92cc15ad8d58b2d0336 Mon Sep 17 00:00:00 2001
From: Charles Eckman <charles@cceckman.com>
Date: Thu, 13 Jun 2024 12:16:14 -0400
Subject: [PATCH 2/2] Clean up algorithm from talk-prep discussion, better
 handling for Unbounded

---
 src/BoundedTerminationPass.cpp        | 195 ++++++++++++++------------
 talk/notes.md                         |   3 +
 testdata/has_infinite_volatile_loop.c |   5 +-
 testdata/infinite_branches.c          |  24 ++++
 4 files changed, 136 insertions(+), 91 deletions(-)
 create mode 100644 testdata/infinite_branches.c

diff --git a/src/BoundedTerminationPass.cpp b/src/BoundedTerminationPass.cpp
index 72888ca..f82945c 100644
--- a/src/BoundedTerminationPass.cpp
+++ b/src/BoundedTerminationPass.cpp
@@ -142,7 +142,8 @@ class BoundedTerminationPrinter
 class FunctionBoundedTerminationPrinter
     : public llvm::PassInfoMixin<FunctionBoundedTerminationPrinter> {
 public:
-  explicit FunctionBoundedTerminationPrinter(llvm::raw_ostream &OutS) : OS(OutS) {}
+  explicit FunctionBoundedTerminationPrinter(llvm::raw_ostream &OutS)
+      : OS(OutS) {}
   llvm::PreservedAnalyses run(llvm::Function &IR,
                               llvm::FunctionAnalysisManager &FAM);
   // Part of the official API:
@@ -201,33 +202,6 @@ std::string friendly_name_block(llvm::StringRef unfriendly) {
   return result;
 }
 
-TerminationPassResult basicBlockClassifier(const llvm::BasicBlock &block) {
-  for (const auto &I : block) {
-    // Classify instructions based on whether we need to look at their metadata
-    // In particular: call, invoke, callbr (these might have unbounded behavior)
-    if (const auto *CI = llvm::dyn_cast<const llvm::CallBase>(&I)) {
-      if (auto called_function = CI->getCalledFunction();
-          called_function == nullptr) {
-        // Indirect function call; we don't know what the properties of the
-        // target are, and our CG analysis won't cover it.
-        // (CG covers CGSCC/recursion detection, as well as propagating
-        // unbounded backwards .)
-        // TODO: Is this necesary, given that we're analyzing calls at the module layer?
-        // The CallGraph has a lot more information about what calls are possible,
-        // and includes a "null" node that indirect calls point to.
-        // Whole-program devirtualization might even take an indirect call and turn it
-        // into a direct call.
-        return TerminationPassResult{.elt = DoesThisTerminate::Unknown,
-                                     .explanation =
-                                         "Performs an indirect function call"};
-      }
-    }
-  }
-
-  return TerminationPassResult{.elt = DoesThisTerminate::Bounded,
-                               .explanation = "no indirect calls"};
-}
-
 TerminationPassResult join(TerminationPassResult res1,
                            TerminationPassResult res2) {
   TerminationPassResult minResult = std::min(res1, res2);
@@ -237,6 +211,7 @@ TerminationPassResult join(TerminationPassResult res1,
     return maxResult;
   }
 
+  // Does not take the interior edge as part of the lattice.
   if (minResult.elt == DoesThisTerminate::Bounded) {
     if (maxResult.elt == DoesThisTerminate::Unbounded) {
       return TerminationPassResult{
@@ -263,6 +238,8 @@ TerminationPassResult join(TerminationPassResult res1,
   return maxResult;
 }
 
+// TODO: If we fix propagation back towards the BB source,
+// can this be fully replaced with `join`?
 TerminationPassResult update(TerminationPassResult result,
                              std::vector<TerminationPassResult> pred_results) {
 
@@ -287,6 +264,10 @@ TerminationPassResult update(TerminationPassResult result,
   }
 }
 
+// Reports whether the loop is bounded, or unknown.
+// TODO: Report "unbounded" if there is no exit from the loop.
+// TODO: ...and handle the "this function has a loop with no exit";
+// means that we have to propagate back to the entry block, look there.
 TerminationPassResult loopClassifier(const llvm::Loop &loop,
                                      llvm::ScalarEvolution &SE) {
   std::optional<llvm::Loop::LoopBounds> bounds = loop.getBounds(SE);
@@ -300,17 +281,6 @@ TerminationPassResult loopClassifier(const llvm::Loop &loop,
                                    "includes a loop, but it has a fixed bound"};
 }
 
-bool isExitingBlock(const llvm::BasicBlock &B) {
-  // Check whether the terminating instruction of the block is a
-  // "return"-type. https://llvm.org/docs/LangRef.html#terminator-instructions
-  const auto *terminator = B.getTerminator();
-  if (terminator == nullptr) {
-    // TODO: Can we print / capture an error here, or something?
-    return true;
-  }
-  return terminator->willReturn();
-}
-
 //------------------------------------------------------------------------------
 // Pass bodies
 //------------------------------------------------------------------------------
@@ -318,10 +288,10 @@ bool isExitingBlock(const llvm::BasicBlock &B) {
 FunctionTerminationPass::Result
 FunctionTerminationPass::run(llvm::Function &F,
                              llvm::FunctionAnalysisManager &FAM) {
-  if(F.empty()) {
+  if (F.empty()) {
     return FunctionTerminationPass::Result{
-      .elt = DoesThisTerminate::Unknown,
-      .explanation = "has no basic blocks in this module",
+        .elt = DoesThisTerminate::Unknown,
+        .explanation = "has no basic blocks in this module",
     };
   }
 
@@ -333,53 +303,89 @@ FunctionTerminationPass::run(llvm::Function &F,
   // deterministic.
   llvm::SetVector<llvm::BasicBlock *> outstanding_nodes;
 
-  // Step 1 : do local basic block analysis
-  for (auto &basic_block : F) {
-    TerminationPassResult result = basicBlockClassifier(basic_block);
-    blocks_to_results.insert_or_assign(&basic_block, result);
-    outstanding_nodes.insert(&basic_block);
-  }
-
-  // Step 2 : do loop-level analysis. We need a ScalarEvolution to get the
-  // loops.
+  // Step 1 : do local basic block analysis.
+  // We don't need to do this? Assume every instruction terminates,
+  // including call instructions. (We'll handle them at the call-graph layer.)
+  // for (auto &basic_block : F) {
+  //   TerminationPassResult result = basicBlockClassifier(basic_block);
+  //   blocks_to_results.insert_or_assign(&basic_block, result);
+  //   outstanding_nodes.insert(&basic_block);
+  // }
+
+  // Step 2 : do loop-level analysis.
+  // We need a ScalarEvolution to get the loops.
+  // The blocks_to_results map is empty before we start this.
   for (auto &basic_block : F) {
     llvm::Loop *loop = loop_info.getLoopFor(&basic_block);
     if (loop == nullptr) {
+      // Block is (locally) bounded.
+      blocks_to_results.insert_or_assign(&basic_block,
+                                         TerminationPassResult{
+                                             .elt = DoesThisTerminate::Bounded,
+                                             .explanation = "",
+                                         });
       continue;
     }
+    // If the loop is bounded, we count this node as bounded too.
     auto result = loopClassifier(*loop, SE);
-    auto updated = join(blocks_to_results.at(&basic_block), result);
-    blocks_to_results.insert_or_assign(&basic_block, updated);
+    blocks_to_results.insert_or_assign(&basic_block, result);
+  }
+  // All blocks are labeled:
+  // - Bounded if not part of a loop.
+  // - Unbounded if an infinite loop (no exit) -- note, hypothetical, this is a TODO
+  // - Unknown if a loop bound cannot be determined
+
+  // Step 3 : aggregate results.
+  // In order to accurately capture:
+  /*
+  * void maybe_hold(bool stall) {
+  *   if(stall) {
+  *     while(true) {}
+  *   } else {
+  *     return;
+  *   }
+  */
+  // We need to propagate towards the entry block,
+  // then use the entry block to determine the function's result.
+  // Is "worklist towards the entry" equivalent "`join` over all results"?
+  // No, we still do need 'update', with its slightly-different semantics from 'join'.
+
+  // Worklist version (towards source):
+  for(llvm::BasicBlock &block : F) {
+    // We add everything, even the non-exit nodes,
+    // so that we make sure we eventually get back to the entry block.
+    // Consider:
+    // void does_not_terminate(bool stall) {
+    //   if(stall) { // entry block: B1, successors are B2/B3
+    //     while(true) {} // B2: predecessors are is B1, B2, successor is B2
+    //   } else {
+    //     while(true) {} // B3: predecessors are B1, B3, successor is B3
+    //  }
+    //  // B4? Exit block? May not exist, has no predecessors
+    // }
+    // We need to make sure that we propagate from non-exiting paths
+    // (Unbounded) to the entry block; so, add everything to begin with,
+    // and just iterate the worklist until we hit a fixpoint.
+    // We still know it will quiesce due to the convergent nature of update().
+    outstanding_nodes.insert(&block);
   }
-
-  // Step 3 : worklist algorithm.
   while (!outstanding_nodes.empty()) {
     llvm::BasicBlock *block = outstanding_nodes.pop_back_val();
     auto original = blocks_to_results.at(block);
     std::vector<TerminationPassResult> results;
-    for (llvm::BasicBlock *predecessor : llvm::predecessors(block)) {
-      results.emplace_back(blocks_to_results.at(predecessor));
+    for (llvm::BasicBlock *successor : llvm::successors(block)) {
+      results.emplace_back(blocks_to_results.at(successor));
     }
     auto altered = update(original, std::move(results));
     blocks_to_results.insert_or_assign(block, altered);
     if (altered.elt != original.elt) {
-      for (auto *successor : llvm::successors(block)) {
-        outstanding_nodes.insert(successor);
+      for (auto *predecessor : llvm::predecessors(block)) {
+        outstanding_nodes.insert(predecessor);
       }
     }
   }
 
-  // Step 4 : join results of exiting blocks
-  TerminationPassResult aggregate_result = {.elt = DoesThisTerminate::Bounded,
-                                            .explanation = ""};
-
-  for (auto const &[key, value] : blocks_to_results) {
-    if (isExitingBlock(*key)) {
-      aggregate_result = join(aggregate_result, value);
-    }
-  }
-
-  return aggregate_result;
+  return blocks_to_results.at(&F.getEntryBlock());
 }
 
 ModuleTerminationPass::Result
@@ -399,41 +405,44 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) {
   // Take anything in a recursive group and force it Unknown.
   // See also NoRecursionCheck in clang-tidy
   llvm::CallGraph &CG = AM.getResult<llvm::CallGraphAnalysis>(IR);
-  for(llvm::scc_iterator<llvm::CallGraph*> SCCI = llvm::scc_begin(&CG); !SCCI.isAtEnd(); ++SCCI) {
-    if(!SCCI.hasCycle()) {
+  for (llvm::scc_iterator<llvm::CallGraph *> SCCI = llvm::scc_begin(&CG);
+       !SCCI.isAtEnd(); ++SCCI) {
+    if (!SCCI.hasCycle()) {
       // SCC doesn't have a loop. We don't need to update anything.
       continue;
     }
     // SCC has a loop. Update all functions to note they're mutually recursive.
     const std::vector<llvm::CallGraphNode *> &nextSCC = *SCCI;
     TerminationPassResult shared_result = {
-      .elt = DoesThisTerminate::Unknown,
-      .explanation = "part of a call graph that contains a loop: ",
+        .elt = DoesThisTerminate::Unknown,
+        .explanation = "part of a call graph that contains a loop: ",
     };
     int count = 0;
-    for(llvm::CallGraphNode *node : nextSCC) {
+    for (llvm::CallGraphNode *node : nextSCC) {
       const llvm::Function *f = node->getFunction();
       // May be null:
-      // 1. Exported functions get edges "in from" the null node, to represent external calls.
+      // 1. Exported functions get edges "in from" the null node, to represent
+      // external calls.
       //    (We don't care about these.)
       // 2. Calls that leave this module, or have an indirect function call,
       //    have calls out to another "null" node.
       //    Indirect functions are handled at the function-local layer.
       //    Extern functions are handled as "unknown body".
       // So we should be fine to skip?
-      if(f == nullptr) {
+      if (f == nullptr) {
         continue;
       }
-      shared_result.explanation = (shared_result.explanation + llvm::demangle(f->getName()));
-      if(count < nextSCC.size() - 1) {
+      shared_result.explanation =
+          (shared_result.explanation + llvm::demangle(f->getName()));
+      if (count < nextSCC.size() - 1) {
         shared_result.explanation += ", ";
       } else {
         ++count;
       }
     }
-    for(llvm::CallGraphNode *node : nextSCC) {
+    for (llvm::CallGraphNode *node : nextSCC) {
       llvm::Function *f = node->getFunction();
-      const auto new_result =  update(per_function_results[f], {shared_result});
+      const auto new_result = update(per_function_results[f], {shared_result});
       per_function_results[f] = new_result;
     }
   }
@@ -442,21 +451,30 @@ ModuleTerminationPass::run(llvm::Module &IR, llvm::ModuleAnalysisManager &AM) {
   // So we just run N^2: scan through each function, update from callees,
   // and run again if we updated something.
   bool stale = true;
-  while(stale) {
+  while (stale) {
     stale = false;
-    for(auto &F : IR) {
+    for (auto &F : IR) {
       TerminationPassResult original = per_function_results[&F];
       const llvm::CallGraphNode *CGNode = CG[&F];
       std::vector<TerminationPassResult> results;
 
       // Update this node from its successors.
-      for(const auto &it : *CGNode) {
+      for (const auto &it : *CGNode) {
         llvm::CallGraphNode *callee = it.second;
-        if(auto *CalleeF = callee->getFunction(); CalleeF != nullptr) {
+        if (auto *CalleeF = callee->getFunction(); CalleeF != nullptr) {
           const auto &result = per_function_results[CalleeF];
           results.emplace_back(TerminationPassResult{
-            .elt = result.elt,
-            .explanation = "via call to " + llvm::demangle(CalleeF->getName()) + ": " + result.explanation,
+              .elt = result.elt,
+              .explanation = "via call to " +
+                             llvm::demangle(CalleeF->getName()) + ": " +
+                             result.explanation,
+          });
+        } else {
+          // Callee is nullptr. Does that mean it's indirect?
+          // TODO: Not sure; we need more testing of indirect calls.
+          results.emplace_back(TerminationPassResult{
+              .elt = DoesThisTerminate::Unknown,
+              .explanation = "via call to unknown function",
           });
         }
       }
@@ -475,8 +493,7 @@ llvm::PreservedAnalyses
 BoundedTerminationPrinter::run(llvm::Module &IR,
                                llvm::ModuleAnalysisManager &AM) {
   auto &module_results = AM.getResult<ModuleTerminationPass>(IR);
-  for (const auto &[function, result] : module_results.per_function_results)
-  {
+  for (const auto &[function, result] : module_results.per_function_results) {
     OS << "Function name: " << llvm::demangle(function->getName()) << "\n";
     OS << "Result: " << result.elt << "\n";
     OS << "Explanation: " << result.explanation << "\n\n";
diff --git a/talk/notes.md b/talk/notes.md
index 8bf483e..d5ab89f 100644
--- a/talk/notes.md
+++ b/talk/notes.md
@@ -29,6 +29,9 @@
   2. Intra-function termination: the basic blocks in each function
   3. Instruction-level termination: the instructions in each basic block
 
+Long version sidebar: lattice-or-not
+
+
 - (1) Calls:
   - If all functions transitively called by the critical section terminate (2),
   - and the call graph is acyclic,
diff --git a/testdata/has_infinite_volatile_loop.c b/testdata/has_infinite_volatile_loop.c
index 6d4182f..63a3d37 100644
--- a/testdata/has_infinite_volatile_loop.c
+++ b/testdata/has_infinite_volatile_loop.c
@@ -1,8 +1,9 @@
 
+volatile int x = 0;
+
 int main() {
-    volatile int x = 0;
     while(1) {
         x++;
     }
     return x;
-}
\ No newline at end of file
+}
diff --git a/testdata/infinite_branches.c b/testdata/infinite_branches.c
new file mode 100644
index 0000000..a59fc88
--- /dev/null
+++ b/testdata/infinite_branches.c
@@ -0,0 +1,24 @@
+
+void maybe_stall(int value) {
+    if(value) {
+      while(1) {}
+    } else {
+      return;
+    }
+}
+
+void always_stall(int value) {
+    if(value) {
+      while(1) {}
+    } else {
+      while(1) {}
+    }
+}
+
+int main() {
+    maybe_stall(0);
+    maybe_stall(1);
+    always_stall(1);
+    return 0;
+}
+