From 617e577ecf76fa8273934ff047013ebae97a8cfc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 18:02:46 +0300 Subject: [PATCH 1/9] FIX(build): py2 needs pinning networkx-2.2 --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bd7883f4..d3dfec84 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,10 @@ author_email='huyng@yahoo-inc.com', url='http://github.com/yahoo/graphkit', packages=['graphkit'], - install_requires=['networkx'], + install_requires=[ + "networkx; python_version >= '3.5'", + "networkx == 2.2; python_version < '3.5'", + ], extras_require={ 'plot': ['pydot', 'matplotlib'] }, From f58d14865f45f07e5125aed9b0a3f151073fa122 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 00:36:57 +0300 Subject: [PATCH 2/9] FIX(#13): BUG in plot-diagram writtin from PY2 era, were writing in text-mode in PY3. and failing as encoding error. --- graphkit/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 0df3ddf8..24c3ac37 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -422,8 +422,8 @@ def get_node_name(a): # save plot if filename: - basename, ext = os.path.splitext(filename) - with open(filename, "w") as fh: + _basename, ext = os.path.splitext(filename) + with open(filename, "wb") as fh: if ext.lower() == ".png": fh.write(g.create_png()) elif ext.lower() == ".dot": From 52c0d7797489866c0a2b745b44014a3bf6626286 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 01:00:48 +0300 Subject: [PATCH 3/9] enh(test): + x2 TC breaking UNSATISFIED operations... receiving partial inputs, needed for other operations. --- test/test_graphkit.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..a6d4dcb3 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -5,7 +5,7 @@ import pickle from pprint import pprint -from operator import add +from operator import add, sub, floordiv, mul from numpy.testing import assert_raises import graphkit.network as network @@ -184,6 +184,38 @@ def test_pruning_raises_for_bad_output(): outputs=['sum1', 'sum3', 'sum4']) +def test_unsatisfied_operations(): + # Test that operations with partial inputs are culled and not failing. + graph = compose(name="graph")( + operation(name="add", needs=["a", "b1"], provides=["a+b1"])(add), + operation(name="sub", needs=["a", "b2"], provides=["a-b2"])(sub), + ) + + exp = {"a": 10, "b1": 2, "a+b1": 12} + assert graph({"a": 10, "b1": 2}) == exp + assert graph({"a": 10, "b1": 2}, outputs=["a+b1"]) == {"a+b1": 12} + + exp = {"a": 10, "b2": 2, "a-b2": 8} + assert graph({"a": 10, "b2": 2}) == exp + assert graph({"a": 10, "b2": 2}, outputs=["a-b2"]) == {"a-b2": 8} + +def test_unsatisfied_operations_same_out(): + # Test unsatisfied pairs of operations providing the same output. + graph = compose(name="graph")( + operation(name="mul", needs=["a", "b1"], provides=["ab"])(mul), + operation(name="div", needs=["a", "b2"], provides=["ab"])(floordiv), + operation(name="add", needs=["ab", "c"], provides=["ab_plus_c"])(add), + ) + + exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} + assert graph({"a": 10, "b1": 2, "c": 1}) == exp + assert graph({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 21} + + exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} + assert graph({"a": 10, "b2": 2, "c": 1}) == exp + assert graph({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 6} + + def test_optional(): # Test that optional() needs work as expected. From bc4c2211d25466896cb5738c4fd9d00c04633e6e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 29 Sep 2019 19:51:57 +0300 Subject: [PATCH 4/9] ENH(net,#18): ignore UN-SATISFIABLE operations with partial inputs + The x2 TCs added just before are now passing. --- graphkit/network.py | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 24c3ac37..04f0dca1 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -8,6 +8,7 @@ from io import StringIO from .base import Operation +from .modifiers import optional class DataPlaceholderNode(str): @@ -141,6 +142,65 @@ def compile(self): raise TypeError("Unrecognized network graph node") + def _collect_satisfiable_needs(self, operation, inputs, satisfiables, visited): + """ + Recusrively check if operation inputs are given/calculated (satisfied), or not. + + :param satisfiables: + the set to populate with satisfiable operations + + :param visited: + a cache of operations & needs, not to visit them again + :return: + true if opearation is satisfiable + """ + assert isinstance(operation, Operation), ( + "Expected Operation, got:", + type(operation), + ) + + if operation in visited: + return visited[operation] + + + def is_need_satisfiable(need): + if need in visited: + return visited[need] + + if need in inputs: + satisfied = True + else: + need_providers = list(self.graph.predecessors(need)) + satisfied = bool(need_providers) and any( + self._collect_satisfiable_needs(op, inputs, satisfiables, visited) + for op in need_providers + ) + visited[need] = satisfied + + return satisfied + + satisfied = all( + is_need_satisfiable(need) + for need in operation.needs + if not isinstance(need, optional) + ) + if satisfied: + satisfiables.add(operation) + visited[operation] = satisfied + + return satisfied + + + def _collect_satisfiable_operations(self, nodes, inputs): + satisfiables = set() + visited = {} + for node in nodes: + if node not in visited and isinstance(node, Operation): + self._collect_satisfiable_needs(node, inputs, satisfiables, visited) + + return satisfiables + + def _find_necessary_steps(self, outputs, inputs): """ Determines what graph steps need to pe run to get to the requested @@ -204,6 +264,13 @@ def _find_necessary_steps(self, outputs, inputs): # Get rid of the unnecessary nodes from the set of necessary ones. necessary_nodes -= unnecessary_nodes + # Drop (un-satifiable) operations with partial inputs. + # See https://github.com/yahoo/graphkit/pull/18 + # + satisfiables = self._collect_satisfiable_operations(necessary_nodes, inputs) + for node in list(necessary_nodes): + if isinstance(node, Operation) and node not in satisfiables: + necessary_nodes.remove(node) necessary_steps = [step for step in self.steps if step in necessary_nodes] From b8daa07bc83b249276107eedf64f9a1d12f584a6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 12:27:17 +0300 Subject: [PATCH 5/9] refact(net): drop old `dag` nx-package --- graphkit/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/network.py b/graphkit/network.py index 0df3ddf8..bb5a198c 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -107,7 +107,7 @@ def compile(self): self.steps = [] # create an execution order such that each layer's needs are provided. - ordered_nodes = list(nx.dag.topological_sort(self.graph)) + ordered_nodes = list(nx.topological_sort(self.graph)) # add Operations evaluation steps, and instructions to free data. for i, node in enumerate(ordered_nodes): From 12bdfe4965ab0c08606bf0551799f42057eb7776 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 15:00:21 +0300 Subject: [PATCH 6/9] ENH(core): ORDERED SETs for DETERMINISTIC results NOTE dict are not deterministic in = '3.5'", "networkx == 2.2; python_version < '3.5'", + "boltons" # for IndexSet ], extras_require={ 'plot': ['pydot', 'matplotlib'] From 489b32c0ed5cae94baa58d4a588d638959fc4e3e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 17:26:29 +0300 Subject: [PATCH 7/9] refact(net): simpilify del-instruction loop --- graphkit/network.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index db9b91de..9fbfd1e9 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -126,19 +126,16 @@ def compile(self): # Add instructions to delete predecessors as possible. A # predecessor may be deleted if it is a data placeholder that # is no longer needed by future Operations. - for predecessor in self.graph.predecessors(node): + for need in self.graph.pred[node]: if self._debug: - print("checking if node %s can be deleted" % predecessor) - predecessor_still_needed = False + print("checking if node %s can be deleted" % need) for future_node in ordered_nodes[i+1:]: - if isinstance(future_node, Operation): - if predecessor in future_node.needs: - predecessor_still_needed = True - break - if not predecessor_still_needed: + if isinstance(future_node, Operation) and need in future_node.needs: + break + else: if self._debug: - print(" adding delete instruction for %s" % predecessor) - self.steps.append(DeleteInstruction(predecessor)) + print(" adding delete instruction for %s" % need) + self.steps.append(DeleteInstruction(need)) else: raise TypeError("Unrecognized network graph node") From b102d44358ef1e60e6d6cffea516bcf11ffe864d Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 19:01:13 +0300 Subject: [PATCH 8/9] REFACT(unsatisfied): doubly-recursive func --> loop on topo-sorted --- graphkit/network.py | 100 ++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 60 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 9fbfd1e9..cad561ee 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -5,7 +5,10 @@ import os import networkx as nx +from collections import defaultdict from io import StringIO +from itertools import chain + from boltons.setutils import IndexedSet as iset @@ -138,66 +141,45 @@ def compile(self): self.steps.append(DeleteInstruction(need)) else: - raise TypeError("Unrecognized network graph node") + raise TypeError("Unrecognized network graph node %s" % type(node)) - def _collect_satisfiable_needs(self, operation, inputs, satisfiables, visited): + def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): """ - Recusrively check if operation inputs are given/calculated (satisfied), or not. - - :param satisfiables: - the set to populate with satisfiable operations - - :param visited: - a cache of operations & needs, not to visit them again - :return: - true if opearation is satisfiable + Traverse ordered graph and mark satisfied needs on each operation, + + collecting those missing at least one. + Since the graph is ordered, as soon as we're on an operation, + all its needs have been accounted, so we can get its satisfaction. + + :param necessary_nodes: + the subset of the graph to consider but WITHOUT the initial data + (because that is what :meth:`_find_necessary_steps()` can gives us...) + :param inputs: + an iterable of the names of the input values + return: + a list of unsatisfiable operations """ - assert isinstance(operation, Operation), ( - "Expected Operation, got:", - type(operation), - ) - - if operation in visited: - return visited[operation] - - - def is_need_satisfiable(need): - if need in visited: - return visited[need] - - if need in inputs: - satisfied = True - else: - need_providers = list(self.graph.predecessors(need)) - satisfied = bool(need_providers) and any( - self._collect_satisfiable_needs(op, inputs, satisfiables, visited) - for op in need_providers - ) - visited[need] = satisfied - - return satisfied - - satisfied = all( - is_need_satisfiable(need) - for need in operation.needs - if not isinstance(need, optional) - ) - if satisfied: - satisfiables.add(operation) - visited[operation] = satisfied - - return satisfied - - - def _collect_satisfiable_operations(self, nodes, inputs): - satisfiables = set() # unordered, not iterated - visited = {} - for node in nodes: - if node not in visited and isinstance(node, Operation): - self._collect_satisfiable_needs(node, inputs, satisfiables, visited) + G = self.graph # shortcut + ok_data = set(inputs) # to collect producible data + op_satisfaction = defaultdict(set) # to collect operation satisfiable needs + unsatisfiables = [] # to collect operations with partial needs + # We also need inputs to mark op_satisfaction. + nodes = chain(necessary_nodes, inputs) # note that `inputs` are plain strings + for node in nx.topological_sort(G.subgraph(nodes)): + if isinstance(node, Operation): + real_needs = set(n for n in node.needs if not isinstance(n, optional)) + if real_needs.issubset(op_satisfaction[node]): + # mark all future data-provides as ok + ok_data.update(G.adj[node]) + else: + unsatisfiables.append(node) + elif isinstance(node, (DataPlaceholderNode, str)) and node in ok_data: + # mark satisfied-needs on all future operations + for future_op in G.adj[node]: + op_satisfaction[future_op].add(node) - return satisfiables + return unsatisfiables def _find_necessary_steps(self, outputs, inputs): @@ -264,12 +246,10 @@ def _find_necessary_steps(self, outputs, inputs): necessary_nodes -= unnecessary_nodes # Drop (un-satifiable) operations with partial inputs. - # See https://github.com/yahoo/graphkit/pull/18 + # See yahoo/graphkit#18 # - satisfiables = self._collect_satisfiable_operations(necessary_nodes, inputs) - for node in list(necessary_nodes): - if isinstance(node, Operation) and node not in satisfiables: - necessary_nodes.remove(node) + unsatisfiables = self._collect_unsatisfiable_operations(necessary_nodes, inputs) + necessary_nodes -= set(unsatisfiables) necessary_steps = [step for step in self.steps if step in necessary_nodes] From 7b99b164571f8822c2684768d541df95ba0705d9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 04:07:21 +0300 Subject: [PATCH 9/9] ENH(netop): mark all its needs as OPTIONAL ... ... backported from #29. --- graphkit/functional.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index 9de470a5..9d88cdf9 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -204,7 +204,10 @@ def order_preserving_uniquifier(seq, seen=None): provides = order_preserving_uniquifier(chain(*[op.provides for op in operations])) needs = order_preserving_uniquifier(chain(*[op.needs for op in operations]), set(provides)) # unordered, not iterated - + # Mark them all as optional, now that #18 calmly ignores + # non-fully satisfied operations. + needs = [optional(n) for op in operations for n in op.needs] + # compile network net = Network() for op in operations: