egraphs-good · Bastacyclop · Aug 23, 2023 · Aug 28, 2023 · Jan 4, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,6 +7,7 @@ version = "0.1.0"
 
 [features]
 ilp-cbc = ["coin_cbc"]
+ilp-cbc-prune = ["coin_cbc"]
 
 [dependencies]
 env_logger = {version = "0.10.0", default-features = false}

diff --git a/data/flexc/03d369d0-bb23-477a-8ea1-a452efc850fe.json b/data/flexc/03d369d0-bb23-477a-8ea1-a452efc850fe.json
diff --git a/data/flexc/4a29e9e7-a232-4608-b7d2-37796d0ecd97.json b/data/flexc/4a29e9e7-a232-4608-b7d2-37796d0ecd97.json
diff --git a/data/flexc/5cd5008c-4481-4751-90cf-0a0e23cd3d30.json b/data/flexc/5cd5008c-4481-4751-90cf-0a0e23cd3d30.json
diff --git a/data/flexc/67addae9-f7db-4ebb-a02e-e1e045f87fc4.json b/data/flexc/67addae9-f7db-4ebb-a02e-e1e045f87fc4.json
diff --git a/data/flexc/757f44bf-7134-426f-9d2f-71bc2dec1b36.json b/data/flexc/757f44bf-7134-426f-9d2f-71bc2dec1b36.json
diff --git a/data/flexc/7bdc5ff7-82f5-4bd0-a8c6-f3a735e016dc.json b/data/flexc/7bdc5ff7-82f5-4bd0-a8c6-f3a735e016dc.json
diff --git a/data/flexc/85a0fc72-1972-4bc8-ab98-d0d468095601.json b/data/flexc/85a0fc72-1972-4bc8-ab98-d0d468095601.json
diff --git a/data/flexc/86d12271-435d-463d-a7b4-e671423f0ced.json b/data/flexc/86d12271-435d-463d-a7b4-e671423f0ced.json
diff --git a/data/flexc/README.md b/data/flexc/README.md
@@ -0,0 +1,6 @@
+This dataset comes from a CGRA mapping tool called 'FlexC': https://arxiv.org/abs/2309.09112.
+
+The e-graphs were derived by running equality saturation on the dataflow of loop bodies found in C code.
+A simple cost model considers e-node operations to be either free (cost 0), costly (cost 1), or unavailable (cost 10 000).
+The 'unavailable' cost of 10 000 encodes an infinite cost: picking these nodes would result in CGRA mapping failure.
+As this is dataflow rewriting, extraction should ideally consider DAG cost.
diff --git a/data/flexc/a097ffbc-8fe4-4919-b6b3-342b0a3acb1b.json b/data/flexc/a097ffbc-8fe4-4919-b6b3-342b0a3acb1b.json
diff --git a/data/flexc/b87bf0f2-74dc-40f2-b601-cd31937b81d5.json b/data/flexc/b87bf0f2-74dc-40f2-b601-cd31937b81d5.json
diff --git a/data/flexc/db344dbd-fb20-4e9e-badb-076d7a3809bc.json b/data/flexc/db344dbd-fb20-4e9e-badb-076d7a3809bc.json
diff --git a/data/flexc/fdbbd205-1519-4880-9a24-07b2ea772d16.json b/data/flexc/fdbbd205-1519-4880-9a24-07b2ea772d16.json
diff --git a/data/flexc/fea54919-328c-4f00-aedc-2698d17914b6.json b/data/flexc/fea54919-328c-4f00-aedc-2698d17914b6.json
diff --git a/data/flexc/ff14944f-3228-48c8-8d8b-2ae849bbddef.json b/data/flexc/ff14944f-3228-48c8-8d8b-2ae849bbddef.json
diff --git a/plot.py b/plot.py
@@ -3,74 +3,60 @@
 import json
 import statistics
 import sys
-
+import os
 
 def load_jsons(files):
     js = []
     for file in files:
         try:
             with open(file) as f:
                 j = json.load(f)
+                if j["dag"] >= 1000.0:
+                    # 1000.0 = Inf
+                    j["dag"] = 1000.0
                 j["json_path"] = file
+                j["json_dir"] = os.path.dirname(file)
                 js.append(j)
         except Exception as e:
             print(f"Error loading {file}")
             raise e
     return js
 
 
-def process(js, extractors=[]):
-    extractors = extractors or sorted(set(j["extractor"] for j in js))
+def process(js):
+    names = set(j["name"] for j in js)
+    extractors = set(j["extractor"] for j in js)
+    for e in extractors:
+        e_names = set(j["name"] for j in js if j["extractor"] == e )
+        assert e_names == names
 
-    by_name = {}
+    dirs_extractors = sorted(set((j["json_dir"], j["extractor"]) for j in js))
+    dir_extractor_to_data = {}
     for j in js:
-        n, e = j["name"], j["extractor"]
-        by_name.setdefault(n, {})[e] = j
-
-    print("extractors:", extractors)
-    assert len(extractors) == 2
-    e1, e2 = extractors
-
-    summaries = {}
+        d, n, e = j["json_dir"], j["name"], j["extractor"]
+        dir_extractor_to_data.setdefault((d, e), []).append(j)
 
-    for name, d in by_name.items():
+    for directory, extractor in dirs_extractors:
+        d = dir_extractor_to_data[(directory, extractor)]
+        print(f"---- {directory} -- {extractor} results:")
         try:
-            tree_ratio = d[e1]["tree"] / d[e2]["tree"]
-            dag_ratio = d[e1]["dag"] / d[e2]["dag"]
-            micros_ratio = max(1, d[e1]["micros"]) / max(1, d[e2]["micros"])
-            summaries[name] = {
-                "tree": tree_ratio,
-                "dag": dag_ratio,
-                "micros": micros_ratio,
-            }
+            dag_mean = statistics.mean(s['dag'] for s in d)
+            micros_mean = statistics.mean(s['micros'] for s in d)
+
+            def quantiles(key):
+                xs = [s[key] for s in d]
+                qs = statistics.quantiles(xs, n=4)
+                with_extremes = [min(xs)] + qs + [max(xs)]
+                return ", ".join(f"{x:.2f}" for x in with_extremes)
+
+            print(f"dag         mean: {dag_mean:.4f}")
+            print(f"micros      mean: {micros_mean:.4f}")
+            print(f"dag    quantiles:    {quantiles('dag')}")
+            print(f"micros quantiles: {quantiles('micros')}")
         except Exception as e:
-            print(f"Error processing {name}")
+            print(f"Error processing {extractor}")
             raise e
 
-    print(f"{e1} / {e2}")
-
-    print("geo mean")
-    tree_summary = statistics.geometric_mean(s["tree"] for s in summaries.values())
-    dag_summary = statistics.geometric_mean(s["dag"] for s in summaries.values())
-    micros_summary = statistics.geometric_mean(s["micros"] for s in summaries.values())
-
-    print(f"tree: {tree_summary:.4f}")
-    print(f"dag: {dag_summary:.4f}")
-    print(f"micros: {micros_summary:.4f}")
-
-    print("quantiles")
-
-    def quantiles(key):
-        xs = [s[key] for s in summaries.values()]
-        qs = statistics.quantiles(xs, n=4)
-        with_extremes = [min(xs)] + qs + [max(xs)]
-        return ", ".join(f"{x:.4f}" for x in with_extremes)
-
-    print(f"tree:   {quantiles('tree')}")
-    print(f"dag:    {quantiles('dag')}")
-    print(f"micros: {quantiles('micros')}")
-
-
 if __name__ == "__main__":
     print()
     print(" ------------------------ ")

diff --git a/src/extract/ilp_cbc.rs b/src/extract/ilp_cbc.rs
@@ -17,7 +17,7 @@ impl Extractor for CbcExtractor {
         let max_order = egraph.nodes.len() as f64 * 10.0;
 
         let mut model = Model::default();
-        // model.set_parameter("seconds", "30");
+        model.set_parameter("seconds", "30");
         // model.set_parameter("allowableGap", "100000000");
 
         let vars: IndexMap<ClassId, ClassVars> = egraph

diff --git a/src/extract/ilp_cbc_prune.rs b/src/extract/ilp_cbc_prune.rs
@@ -0,0 +1,199 @@
+use super::*;
+use coin_cbc::{Col, Model, Sense};
+use indexmap::IndexSet;
+use ordered_float::NotNan;
+
+const BAN_ABOVE_COST: Cost = unsafe { NotNan::new_unchecked(1000.0) };
+
+struct ClassVars {
+    active: Col,
+    order: Col,
+    nodes: Vec<Option<Col>>, // some nodes are pruned
+}
+
+pub struct CbcPruneExtractor;
+
+impl Extractor for CbcPruneExtractor {
+    fn extract(&self, egraph: &EGraph, roots: &[ClassId]) -> ExtractionResult {
+        let max_order = egraph.nodes.len() as f64 * 10.0;
+
+        let mut to_prune: IndexSet<(ClassId, usize)> = Default::default();
+        find_nodes_to_prune(egraph, |id, i| {
+            to_prune.insert((id, i));
+        });
+
+        let mut model = Model::default();
+        model.set_parameter("seconds", "30");
+
+        let vars: IndexMap<ClassId, ClassVars> = egraph
+            .classes()
+            .values()
+            .map(|class| {
+                let cvars = ClassVars {
+                    active: model.add_binary(),
+                    order: model.add_col(),
+                    nodes: class
+                        .nodes
+                        .iter()
+                        .enumerate()
+                        .map(|(i, _)| {
+                            if to_prune.contains(&(class.id.clone(), i)) {
+                                None
+                            } else {
+                                Some(model.add_binary())
+                            }
+                        })
+                        .collect(),
+                };
+                model.set_col_upper(cvars.order, max_order);
+                (class.id.clone(), cvars)
+            })
+            .collect();
+
+        for (id, class) in &vars {
+            let row = model.add_row();
+            model.set_row_equal(row, 0.0);
+            model.set_weight(row, class.active, -1.0);
+            for &node_active in class.nodes.iter().flatten() {
+                // only set weight for non-pruned e-nodes
+                model.set_weight(row, node_active, 1.0);
+            }
+
+            for (node_id, &node_active_opt) in egraph[id].nodes.iter().zip(&class.nodes) {
+                if let Some(node_active) = node_active_opt {
+                    let node = &egraph[node_id];
+                    for child in &node.children {
+                        let eclass_id = &egraph[child].eclass;
+                        let child_active = vars[eclass_id].active;
+                        // node active implies child active, encoded as:
+                        //   node_active <= child_active
+                        //   node_active - child_active <= 0
+                        let row = model.add_row();
+                        model.set_row_upper(row, 0.0);
+                        model.set_weight(row, node_active, 1.0);
+                        model.set_weight(row, child_active, -1.0);
+                    }
+                }
+            }
+        }
+
+        model.set_obj_sense(Sense::Minimize);
+        for class in egraph.classes().values() {
+            for (node_id, &node_active_opt) in class.nodes.iter().zip(&vars[&class.id].nodes) {
+                if let Some(node_active) = node_active_opt {
+                    let node = &egraph[node_id];
+                    model.set_obj_coeff(node_active, node.cost.into_inner());
+                }
+            }
+        }
+
+        dbg!(max_order);
+
+        for class in vars.values() {
+            model.set_binary(class.active);
+        }
+
+        for root in roots {
+            // let root = &egraph.find(*root);
+            model.set_col_lower(vars[root].active, 1.0);
+        }
+
+        // set initial solution based on bottom up extractor
+        let initial_result = super::bottom_up::BottomUpExtractor.extract(egraph, roots);
+        /* FIXME: would need to keep ILP variables for pruned cycle nodes, only removing the cost pruned ones.
+        for (class, class_vars) in egraph.classes().values().zip(vars.values()) {
+            if let Some(node_id) = initial_result.choices.get(&class.id) {
+                model.set_col_initial_solution(class_vars.active, 1.0);
+                for col in class_vars.nodes.iter().flatten() {
+                    model.set_col_initial_solution(*col, 0.0);
+                }
+                let node_idx = class.nodes.iter().position(|n| n == node_id).unwrap();
+                if to_prune.contains(&(class.id.clone(), node_idx)) {
+                    println!("WARNING: infeasible initial solution, returning it anyway");
+                    return initial_result;
+                }
+                model.set_col_initial_solution(class_vars.nodes[node_idx].unwrap(), 1.0);
+            } else {
+                model.set_col_initial_solution(class_vars.active, 0.0);
+            }
+        } */
+
+        let solution = model.solve();
+        log::info!(
+            "CBC status {:?}, {:?}, obj = {}",
+            solution.raw().status(),
+            solution.raw().secondary_status(),
+            solution.raw().obj_value(),
+        );
+        if solution.raw().is_proven_infeasible()
+            || solution.raw().status() != coin_cbc::raw::Status::Finished
+        {
+            println!("WARNING: no solution found, returning bottom up solution.");
+            return initial_result;
+        }
+
+        let mut result = ExtractionResult::default();
+        for (id, var) in &vars {
+            let active = solution.col(var.active) > 0.0;
+            if active {
+                let node_idx = var
+                    .nodes
+                    .iter()
+                    .position(|&n_opt| n_opt.map(|n| solution.col(n) > 0.0).unwrap_or(false))
+                    .unwrap();
+                let node_id = egraph[id].nodes[node_idx].clone();
+                result.choose(id.clone(), node_id);
+            }
+        }
+
+        return result;
+    }
+}
+
+// does not use @khaki3's fix
+// https://github.com/egraphs-good/egg/issues/207#issuecomment-1264737441
+fn find_nodes_to_prune(egraph: &EGraph, mut f: impl FnMut(ClassId, usize)) {
+    enum Color {
+        White,
+        Gray,
+        Black,
+    }
+    type Enter = bool;
+
+    let mut color: HashMap<ClassId, Color> = egraph
+        .classes()
+        .values()
+        .map(|c| (c.id.clone(), Color::White))
+        .collect();
+    let mut stack: Vec<(Enter, ClassId)> = egraph
+        .classes()
+        .values()
+        .map(|c| (true, c.id.clone()))
+        .collect();
+
+    let n2c = |nid: &NodeId| egraph.nid_to_cid(nid);
+
+    while let Some((enter, id)) = stack.pop() {
+        if enter {
+            *color.get_mut(&id).unwrap() = Color::Gray;
+            stack.push((false, id.clone()));
+            for (i, node_id) in egraph[&id].nodes.iter().enumerate() {
+                let node = &egraph[node_id];
+                if node.cost >= BAN_ABOVE_COST {
+                    f(id.clone(), i);
+                    continue;
+                }
+                for child in &node.children {
+                    let child = n2c(child);
+                    match &color[&child] {
+                        Color::White => stack.push((true, child.clone())),
+                        Color::Gray => f(id.clone(), i),
+                        Color::Black => (),
+                    }
+                }
+            }
+        } else {
+            *color.get_mut(&id).unwrap() = Color::Black;
+        }
+    }
+}
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
@@ -7,6 +7,8 @@ pub mod greedy_dag;
 
 #[cfg(feature = "ilp-cbc")]
 pub mod ilp_cbc;
+#[cfg(feature = "ilp-cbc-prune")]
+pub mod ilp_cbc_prune;
 
 pub trait Extractor: Sync {
     fn extract(&self, egraph: &EGraph, roots: &[ClassId]) -> ExtractionResult;

diff --git a/src/main.rs b/src/main.rs
@@ -26,6 +26,11 @@ fn main() {
         ),
         #[cfg(feature = "ilp-cbc")]
         ("ilp-cbc", extract::ilp_cbc::CbcExtractor.boxed()),
+        #[cfg(feature = "ilp-cbc-prune")]
+        (
+            "ilp-cbc-prune",
+            extract::ilp_cbc_prune::CbcPruneExtractor.boxed(),
+        ),
     ]
     .into_iter()
     .collect();