Skip to content

Commit

Permalink
Cranelift: Split out dominator tree's depth-first traversal into a re…
Browse files Browse the repository at this point in the history
…usable iterator (#8640)

We intend to use this when computing liveness of GC references in
`cranelift-frontend` to manually construct safepoints and ultimately remove
`r{32,64}` reference types from CLIF, `cranelift-codegen`, and `regalloc2`.

Co-authored-by: Trevor Elliott <[email protected]>
  • Loading branch information
fitzgen and elliottt authored May 20, 2024
1 parent 0ea10ac commit cacfaf8
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 84 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cranelift/codegen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ rustc-hash = { workspace = true }
[dev-dependencies]
criterion = { workspace = true }
similar = "2.1.0"
env_logger = { workspace = true }

[build-dependencies]
cranelift-codegen-meta = { path = "meta", version = "0.109.0" }
Expand Down
88 changes: 6 additions & 82 deletions cranelift/codegen/src/dominator_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir::{Block, Function, Inst, Layout, ProgramPoint};
use crate::packed_option::PackedOption;
use crate::timing;
use crate::traversals::Dfs;
use alloc::vec::Vec;
use core::cmp;
use core::cmp::Ordering;
Expand All @@ -14,9 +15,6 @@ use core::mem;
/// room for modifications of the dominator tree.
const STRIDE: u32 = 4;

/// Special RPO numbers used during `compute_postorder`.
const SEEN: u32 = 1;

/// Dominator tree node. We keep one of these per block.
#[derive(Clone, Default)]
struct DomNode {
Expand All @@ -34,21 +32,15 @@ struct DomNode {
idom: PackedOption<Inst>,
}

/// DFT stack state marker for computing the cfg postorder.
enum Visit {
First,
Last,
}

/// The dominator tree for a single function.
pub struct DominatorTree {
nodes: SecondaryMap<Block, DomNode>,

/// CFG post-order of all reachable blocks.
postorder: Vec<Block>,

/// Scratch memory used by `compute_postorder()`.
stack: Vec<(Visit, Block)>,
/// Scratch traversal state used by `compute_postorder()`.
dfs: Dfs,

valid: bool,
}
Expand Down Expand Up @@ -225,7 +217,7 @@ impl DominatorTree {
Self {
nodes: SecondaryMap::new(),
postorder: Vec::new(),
stack: Vec::new(),
dfs: Dfs::new(),
valid: false,
}
}
Expand All @@ -236,7 +228,7 @@ impl DominatorTree {
let mut domtree = Self {
nodes: SecondaryMap::with_capacity(block_capacity),
postorder: Vec::with_capacity(block_capacity),
stack: Vec::new(),
dfs: Dfs::new(),
valid: false,
};
domtree.compute(func, cfg);
Expand All @@ -257,7 +249,6 @@ impl DominatorTree {
pub fn clear(&mut self) {
self.nodes.clear();
self.postorder.clear();
debug_assert!(self.stack.is_empty());
self.valid = false;
}

Expand All @@ -276,73 +267,7 @@ impl DominatorTree {
fn compute_postorder(&mut self, func: &Function) {
self.clear();
self.nodes.resize(func.dfg.num_blocks());

// This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
// post-order of the blocks that are reachable form the entry block. A DFT post-order is not
// unique. The specific order we get is controlled by the order each node's children are
// visited.
//
// We view the CFG as a graph where each `BlockCall` value of a terminating branch
// instruction is an edge. A consequence of this is that we visit successor nodes in the
// reverse order specified by the branch instruction that terminates the basic block.
// (Reversed because we are using a stack to control traversal, and push the successors in
// the order the branch instruction specifies -- there's no good reason for this particular
// order.)
//
// During this algorithm only, use `rpo_number` to hold the following state:
//
// 0: block has not yet had its first visit
// SEEN: block has been visited at least once, implying that all of its successors are on
// the stack

match func.layout.entry_block() {
Some(block) => {
self.stack.push((Visit::First, block));
}
None => return,
}

while let Some((visit, block)) = self.stack.pop() {
match visit {
Visit::First => {
if self.nodes[block].rpo_number == 0 {
// This is the first time we pop the block, so we need to scan its
// successors and then revisit it.
self.nodes[block].rpo_number = SEEN;
self.stack.push((Visit::Last, block));
if let Some(inst) = func.stencil.layout.last_inst(block) {
// Heuristic: chase the children in reverse. This puts the first
// successor block first in the postorder, all other things being
// equal, which tends to prioritize loop backedges over out-edges,
// putting the edge-block closer to the loop body and minimizing
// live-ranges in linear instruction space. This heuristic doesn't have
// any effect on the computation of dominators, and is purely for other
// consumers of the postorder we cache here.
for block in func.stencil.dfg.insts[inst]
.branch_destination(&func.stencil.dfg.jump_tables)
.iter()
.rev()
{
let succ = block.block(&func.stencil.dfg.value_lists);

// This is purely an optimization to avoid additional iterations of
// the loop, and is not required; it's merely inlining the check
// from the outer conditional of this case to avoid the extra loop
// iteration.
if self.nodes[succ].rpo_number == 0 {
self.stack.push((Visit::First, succ))
}
}
}
}
}

Visit::Last => {
// We've finished all this node's successors.
self.postorder.push(block);
}
}
}
self.postorder.extend(self.dfs.post_order_iter(func));
}

/// Build a dominator tree from a control flow graph using Keith D. Cooper's
Expand Down Expand Up @@ -467,7 +392,6 @@ impl DominatorTreePreorder {
/// Recompute this data structure to match `domtree`.
pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
self.nodes.clear();
debug_assert_eq!(self.stack.len(), 0);

// Step 1: Populate the child and sibling links.
//
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub mod loop_analysis;
pub mod print_errors;
pub mod settings;
pub mod timing;
pub mod traversals;
pub mod verifier;
pub mod write;

Expand Down
216 changes: 216 additions & 0 deletions cranelift/codegen/src/traversals.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
//! Traversals over the IR.

use crate::ir;
use alloc::vec::Vec;
use core::fmt::Debug;
use core::hash::Hash;
use cranelift_entity::EntitySet;

/// A low-level DFS traversal event: either entering or exiting the traversal of
/// a block.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Event {
/// Entering traversal of a block.
///
/// Processing a block upon this event corresponds to a pre-order,
/// depth-first traversal.
Enter,

/// Exiting traversal of a block.
///
/// Processing a block upon this event corresponds to a post-order,
/// depth-first traversal.
Exit,
}

/// A depth-first traversal.
///
/// This is a fairly low-level traversal type, and is generally intended to be
/// used as a building block for making specific pre-order or post-order
/// traversals for whatever problem is at hand.
///
/// This type may be reused multiple times across different passes or functions
/// and will internally reuse any heap allocations its already made.
///
/// Traversal is not recursive.
#[derive(Debug, Default, Clone)]
pub struct Dfs {
stack: Vec<(Event, ir::Block)>,
seen: EntitySet<ir::Block>,
}

impl Dfs {
/// Construct a new depth-first traversal.
pub fn new() -> Self {
Self::default()
}

/// Perform a depth-first traversal over the given function.
///
/// Yields pairs of `(Event, ir::Block)`.
///
/// This iterator can be used to perform either pre- or post-order
/// traversals, or a combination of the two.
pub fn iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsIter<'a> {
self.seen.clear();
self.stack.clear();
if let Some(e) = func.layout.entry_block() {
self.stack.push((Event::Enter, e));
}
DfsIter { dfs: self, func }
}

/// Perform a pre-order traversal over the given function.
///
/// Yields `ir::Block` items.
pub fn pre_order_iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsPreOrderIter<'a> {
DfsPreOrderIter(self.iter(func))
}

/// Perform a post-order traversal over the given function.
///
/// Yields `ir::Block` items.
pub fn post_order_iter<'a>(&'a mut self, func: &'a ir::Function) -> DfsPostOrderIter<'a> {
DfsPostOrderIter(self.iter(func))
}
}

/// An iterator that yields pairs of `(Event, ir::Block)` items as it performs a
/// depth-first traversal over its associated function.
pub struct DfsIter<'a> {
dfs: &'a mut Dfs,
func: &'a ir::Function,
}

impl Iterator for DfsIter<'_> {
type Item = (Event, ir::Block);

fn next(&mut self) -> Option<(Event, ir::Block)> {
let (event, block) = self.dfs.stack.pop()?;

if event == Event::Enter && self.dfs.seen.insert(block) {
self.dfs.stack.push((Event::Exit, block));
if let Some(inst) = self.func.layout.last_inst(block) {
self.dfs.stack.extend(
self.func.dfg.insts[inst]
.branch_destination(&self.func.dfg.jump_tables)
.iter()
// Heuristic: chase the children in reverse. This puts
// the first successor block first in the postorder, all
// other things being equal, which tends to prioritize
// loop backedges over out-edges, putting the edge-block
// closer to the loop body and minimizing live-ranges in
// linear instruction space. This heuristic doesn't have
// any effect on the computation of dominators, and is
// purely for other consumers of the postorder we cache
// here.
.rev()
.map(|block| block.block(&self.func.dfg.value_lists))
// This is purely an optimization to avoid additional
// iterations of the loop, and is not required; it's
// merely inlining the check from the outer conditional
// of this case to avoid the extra loop iteration. This
// also avoids potential excess stack growth.
.filter(|block| !self.dfs.seen.contains(*block))
.map(|block| (Event::Enter, block)),
);
}
}

Some((event, block))
}
}

/// An iterator that yields `ir::Block` items during a depth-first, pre-order
/// traversal over its associated function.
pub struct DfsPreOrderIter<'a>(DfsIter<'a>);

impl Iterator for DfsPreOrderIter<'_> {
type Item = ir::Block;

fn next(&mut self) -> Option<Self::Item> {
loop {
match self.0.next()? {
(Event::Enter, b) => return Some(b),
(Event::Exit, _) => continue,
}
}
}
}

/// An iterator that yields `ir::Block` items during a depth-first, post-order
/// traversal over its associated function.
pub struct DfsPostOrderIter<'a>(DfsIter<'a>);

impl Iterator for DfsPostOrderIter<'_> {
type Item = ir::Block;

fn next(&mut self) -> Option<Self::Item> {
loop {
match self.0.next()? {
(Event::Exit, b) => return Some(b),
(Event::Enter, _) => continue,
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{types::I32, Function, InstBuilder, TrapCode};

#[test]
fn test_dfs_traversal() {
let _ = env_logger::try_init();

let mut func = Function::new();

let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let block3 = func.dfg.make_block();

let mut cur = FuncCursor::new(&mut func);

// block0(v0):
// br_if v0, block2, trap_block
cur.insert_block(block0);
cur.ins().brif(v0, block2, &[], block3, &[]);

// block3:
// trap user0
cur.insert_block(block3);
cur.ins().trap(TrapCode::User(0));

// block1:
// v1 = iconst.i32 1
// v2 = iadd v0, v1
// jump block0(v2)
cur.insert_block(block1);
let v1 = cur.ins().iconst(I32, 1);
let v2 = cur.ins().iadd(v0, v1);
cur.ins().jump(block0, &[v2]);

// block2:
// return v0
cur.insert_block(block2);
cur.ins().return_(&[v0]);

let mut dfs = Dfs::new();

assert_eq!(
dfs.iter(&func).collect::<Vec<_>>(),
vec![
(Event::Enter, block0),
(Event::Enter, block2),
(Event::Exit, block2),
(Event::Enter, block3),
(Event::Exit, block3),
(Event::Exit, block0)
],
);
}
}
Loading

0 comments on commit cacfaf8

Please sign in to comment.