From e80b2c9138c347072c992ffa4b3a3fc33c2e2939 Mon Sep 17 00:00:00 2001 From: Owen Lynch Date: Wed, 21 Aug 2024 12:04:01 -0700 Subject: [PATCH] addressed review comments --- src/EGraphs/egraph.jl | 16 ++++++--- src/EGraphs/unionfind.jl | 72 ++++++++++++++++++++-------------------- src/vecexpr.jl | 28 +++++++--------- 3 files changed, 60 insertions(+), 56 deletions(-) diff --git a/src/EGraphs/egraph.jl b/src/EGraphs/egraph.jl index d31e3d58..e3c5aed8 100644 --- a/src/EGraphs/egraph.jl +++ b/src/EGraphs/egraph.jl @@ -77,6 +77,9 @@ end """ merge_analysis_data!(a::EClass{D}, b::EClass{D})::Tuple{Bool,Bool,Union{D,Nothing}} where {D} +This is an internal function and should not be called directly by users; this +docstring is only for those who wish to understand Metatheory internals. + Returns a tuple of: `(did_update_a, did_update_b, newdata)` where: - `did_update_a` is a boolean indicating whether `a`'s analysis class was updated @@ -112,7 +115,9 @@ end """ Recall that `Base.hash` combines an existing "seed" (h) with a new value (a). -In this case, we just use bitwise XOR; very cheap! +In this case, we just use bitwise XOR; very cheap! This is because +[`IdKey`](@ref) is supposed to just hash to itself, so we don't need to do +anything special to `a.val`. """ Base.hash(a::IdKey, h::UInt) = xor(a.val, h) @@ -163,8 +168,10 @@ mutable struct EGraph{ExpressionType,Analysis} """ hashcons the constants in the e-graph - For performance reasons, the "head" of an e-node is hashed and the - value is stored in this Dict. + For performance reasons, the "head" of an e-node is stored in the e-node as a + hash (so that it fits in a flat vector of unsigned integers + ([`VecExpr`](@ref))) and this dictionary is used to get the actual Julia + object of the head when extracting terms. """ constants::Dict{UInt64,Any} @@ -593,7 +600,8 @@ end # Thanks to Max Willsey and Yihong Zhang """ -Look up a grounded pattern. +Given a ground pattern, which is a pattern that has no pattern variables, find +the eclass id in the egraph that represents that ground pattern. """ function lookup_pat(g::EGraph{ExpressionType}, p::PatExpr)::Id where {ExpressionType} @assert isground(p) diff --git a/src/EGraphs/unionfind.jl b/src/EGraphs/unionfind.jl index 016c8d3a..d037fc69 100644 --- a/src/EGraphs/unionfind.jl +++ b/src/EGraphs/unionfind.jl @@ -1,4 +1,8 @@ """ + UnionFind() + +Creates a new empty unionfind. + A unionfind data structure is an eventually-idempotent endomorphism on `1:n`. What does this mean? It is a function `parents : 1:n -> 1:n` such that @@ -25,11 +29,6 @@ struct UnionFind parents::Vector{Id} end -""" - UnionFind() - -Creates a new empty unionfind. -""" UnionFind() = UnionFind(Id[]) """ @@ -68,46 +67,47 @@ function Base.union!(uf::UnionFind, i::Id, j::Id) i end -""" - find(uf::UnionFind, i::Id) -This computes the fixed point of `uf.parents` when applied to `i`. +# Potential optimization: -We know we are at a fixed point once `i == uf.parents[i]`. So, we continually -set `i = uf.parents[i]` until this becomes true. +# ```julia +# j = i +# while j != uf.parents[j] +# j = uf.parents[j] +# end +# root = j +# while i != uf.parents[i] +# uf.parents[i] = root +# i = uf.parents[i] +# end +# root +# ``` -Potential optimization: +# This optimization sets up a "short-circuit". That is, before, the parents array +# might be set up as -```julia -j = i -while j != uf.parents[j] - j = uf.parents[j] -end -root = j -while i != uf.parents[i] - uf.parents[i] = root - i = uf.parents[i] -end -root -``` +# ``` +# 1 -> 5 -> 2 -> 3 -> 3 +# ``` -This optimization sets up a "short-circuit". That is, before, the parents array -might be set up as +# After, we have -``` -1 -> 5 -> 2 -> 3 -> 3 -``` +# ``` +# 1 -> 3 +# 5 -> 3 +# 2 -> 3 +# 3 -> 3 +# ``` -After, we have +# Note: why don't we do this optimization? Question for Alessandro. -``` -1 -> 3 -5 -> 3 -2 -> 3 -3 -> 3 -``` +""" + find(uf::UnionFind, i::Id) + +This computes the fixed point of `uf.parents` when applied to `i`. -Note: why don't we do this optimization? Question for Alessandro. +We know we are at a fixed point once `i == uf.parents[i]`. So, we continually +set `i = uf.parents[i]` until this becomes true. """ function find(uf::UnionFind, i::Id) while i != uf.parents[i] diff --git a/src/vecexpr.jl b/src/vecexpr.jl index 504f7c54..b72def06 100644 --- a/src/vecexpr.jl +++ b/src/vecexpr.jl @@ -40,22 +40,18 @@ An e-node is represented by `Vector{Id}` where: * Position 4 stores the hash of the `head` (if `isexpr`) or node value in the e-graph constants. * The rest of the positions store the e-class ids of the children nodes. -The meaning of the bitflags `isexpr` and `iscall` can be best understood through looking at -the source for `to_expr(g::EGraph, n::VecExpr)` in `src/EGraphs/egraph.jl`. Namely, -e-nodes for which `isexpr` is false have no arguments; their only "data" is their head. -E-nodes for which `isexpr` is true and `iscall` is also true correspond to -`Expr(:call, head, args...)` expressions, and e-nodes for which `isexpr` is true but -`iscall` is false correspond to `Expr(head, args...)` expressions. There should -not be `VecExpr`s with `isexpr = false` but `iscall = true`. - -The "signature" of an expression seems to in practice be computed as the hash of the head combined -with the number of arguments (the arity). See: [`addexpr!`]() in `src/EGraphs/egraph.jl`. -Perhaps in the future, signatures could also involve type information, e.g. to disambiguate -overloaded heads? Signatures are used in the `classes_by_op` dictionary in a e-graph, -so that when you are matching for `(a + b)` you can iterate over all of the e-classes -that have some e-node with `(+, 2)` as its signature. - -It also seems like the signature of a constant is `0`. +The bitflags `isexpr` and `iscall` corresponds to the values of `isexpr(e)::Boolean` +and `iscall(e)::Boolean` from TermInterface. For instance, for `Expr`, we have +`iscall(Expr(:call, f, args...)) = true`, `isexpr(Expr(head, args...)) = true`, but +`iscall(Expr(:=, :a, :b)) = false` and `isexpr(2) = false`. + +The "signature" of an expression is computed as the hash of the head combined +with the number of arguments (the arity). See: [`addexpr!`](@ref) in +`src/EGraphs/egraph.jl`. Signatures are used in the `classes_by_op` dictionary +in a e-graph, so that when you are matching for `(a + b)` you can iterate over +all of the e-classes that have some e-node with `(+, 2)` as its signature. + +The signature of a constant is `0`. The expression is represented as an array of integers to improve performance. The hash value for the VecExpr is cached in the first position for faster lookup performance in dictionaries.