diff --git a/deepcell_tracking/isbi_utils.py b/deepcell_tracking/isbi_utils.py index 4372e0e..8c9629b 100644 --- a/deepcell_tracking/isbi_utils.py +++ b/deepcell_tracking/isbi_utils.py @@ -35,37 +35,51 @@ import networkx as nx import numpy as np import pandas as pd +import warnings from deepcell_toolbox import compute_overlap from deepcell_tracking.utils import load_trks -def trk_to_isbi(track, path): +def trk_to_isbi(track, path=None): """Convert a lineage track into an ISBI formatted text file. Args: track (dict): Cell lineage object. - path (str): Path to save the .txt file. + path (str): Path to save the .txt file (deprecated). + + Returns: + pd.DataFrame: DataFrame of ISBI data for each label. """ - with open(path, 'w') as text_file: - for label in track: - first_frame = min(track[label]['frames']) - last_frame = max(track[label]['frames']) - parent = track[label]['parent'] - parent = 0 if parent is None else parent - if parent: - parent_frames = track[parent]['frames'] - if parent_frames[-1] != first_frame - 1: - parent = 0 - - line = '{cell_id} {start} {end} {parent}\n'.format( - cell_id=label, - start=first_frame, - end=last_frame, - parent=parent - ) - - text_file.write(line) + isbi = [] + for label in track: + first_frame = min(track[label]['frames']) + last_frame = max(track[label]['frames']) + parent = track[label]['parent'] + parent = 0 if parent is None else parent + if parent: + parent_frames = track[parent]['frames'] + if parent_frames[-1] != first_frame - 1: + parent = 0 + + isbi_dict = {'Cell_ID': label, + 'Start': first_frame, + 'End': last_frame, + 'Parent_ID': parent} + isbi.append(isbi_dict) + + if path is not None: + with open(path, 'w') as text_file: + for cell in isbi_dict: + line = '{cell_id} {start} {end} {parent}\n'.format( + cell_id=cell['Cell_ID'], + start=cell['Start'], + end=cell['End'], + parent=cell['Parent_ID'] + ) + text_file.write(line) + df = pd.DataFrame(isbi) + return df def contig_tracks(label, batch_info, batch_tracked): @@ -145,7 +159,6 @@ def match_nodes(gt, res): Raises: ValueError: If . """ - num_frames = gt.shape[0] iou = np.zeros((num_frames, np.max(gt) + 1, np.max(res) + 1)) @@ -192,16 +205,33 @@ def txt_to_graph(path, node_key=None): Args: path (str): Path to the ISBI text file. + node_key (dict): Map between gt nodes and result nodes Returns: - networkx.Graph: Graph representation of the text file. + networkx.Graph: Graph representation of the ISBI data. Raises: ValueError: If the Parent_ID is not in any previous frames. """ names = ['Cell_ID', 'Start', 'End', 'Parent_ID'] df = pd.read_csv(path, header=None, sep=' ', names=names) + G = isbi_to_graph(df, node_key) + return G + + +def isbi_to_graph(df, node_key=None): + """Create a Graph from DataFrame of ISBI info. + + Args: + data (pd.DataFrame): DataFrame of ISBI-style info. + node_key (dict): Map between gt nodes and result nodes + Returns: + networkx.Graph: Graph representation of the ISBI data. + + Raises: + ValueError: If the Parent_ID is not in any previous frames. + """ if node_key is not None: df[['Cell_ID', 'Parent_ID']] = df[['Cell_ID', 'Parent_ID']].replace(node_key) @@ -237,7 +267,7 @@ def txt_to_graph(path, node_key=None): if source not in all_ids: # parents should be in the previous frame. # parent_frame = df[df['Cell_ID'] == row['Parent_id']]['End'] # source = '{}_{}'.format(row['Parent_ID'], parent_frame) - print('%s: skipped parent %s to daughter %s' % (path, source, row['Cell_ID'])) + print('skipped parent %s to daughter %s' % (source, row['Cell_ID'])) continue target = '{}_{}'.format(row['Cell_ID'], row['Start']) @@ -329,21 +359,20 @@ def classify_divisions(G_gt, G_res): } -def benchmark_division_performance(trk_gt, trk_res, path_gt, path_res): - """Compare two related .trk files (one being the GT of the other) and meaasure - performance on the the divisions in the GT file. This function produces two .txt - documents as a by-product (ISBI-style lineage documents) - - # TODO: there should be an option to not write the files but compare in memory +def benchmark_division_performance(trk_gt, trk_res, path_gt=None, path_res=None): + """Compare two related .trk files (one being the GT of the other) and measure + performance on the the divisions in the GT file. Args: trk_gt (path): Path to the ground truth .trk file. trk_res (path): Path to the predicted results .trk file. - path_gt (path): Desired destination path for the GT ISBI-style .txt file. - path_res (path): Desired destination path for the result ISBI-style .txt file. + path_gt (path): Desired destination path for the GT ISBI-style .txt + file (deprecated). + path_res (path): Desired destination path for the result ISBI-style + .txt file (deprecated). Returns: - dict: Diciontary of all division statistics. + dict: Dictionary of all division statistics. """ # Identify nodes with parent attribute # Load both .trk @@ -352,9 +381,12 @@ def benchmark_division_performance(trk_gt, trk_res, path_gt, path_res): trks = load_trks(trk_res) lineage_res, _, y_res = trks['lineages'][0], trks['X'], trks['y'] - # Produce ISBI style text doc to work with - trk_to_isbi(lineage_gt, path_gt) - trk_to_isbi(lineage_res, path_res) + # Produce ISBI style array to work with + if path_gt is not None or path_res is not None: + warnings.warn('The `path_gt` and `path_res` arguments are deprecated.', + DeprecationWarning) + gt = trk_to_isbi(lineage_gt, path_gt) + res = trk_to_isbi(lineage_res, path_res) # Match up labels in GT to Results to allow for direct comparisons cells_gt, cells_res = match_nodes(y_gt, y_res) @@ -362,13 +394,13 @@ def benchmark_division_performance(trk_gt, trk_res, path_gt, path_res): if len(np.unique(cells_res)) < len(np.unique(cells_gt)): node_key = {r: g for g, r in zip(cells_gt, cells_res)} # node_key maps gt nodes onto resnodes so must be applied to gt - G_res = txt_to_graph(path_res, node_key=node_key) - G_gt = txt_to_graph(path_gt) + G_res = isbi_to_graph(res, node_key=node_key) + G_gt = isbi_to_graph(gt) div_results = classify_divisions(G_gt, G_res) else: node_key = {g: r for g, r in zip(cells_gt, cells_res)} - G_res = txt_to_graph(path_res) - G_gt = txt_to_graph(path_gt, node_key=node_key) + G_res = isbi_to_graph(res) + G_gt = isbi_to_graph(gt, node_key=node_key) div_results = classify_divisions(G_gt, G_res) return div_results diff --git a/deepcell_tracking/isbi_utils_test.py b/deepcell_tracking/isbi_utils_test.py index dfebc52..c98ae7b 100644 --- a/deepcell_tracking/isbi_utils_test.py +++ b/deepcell_tracking/isbi_utils_test.py @@ -31,9 +31,13 @@ import copy import os +import tarfile +import tempfile +import json import networkx as nx import numpy as np +import pandas as pd from deepcell_tracking import isbi_utils from deepcell_tracking.test_utils import get_annotated_movie @@ -43,8 +47,8 @@ class TestIsbiUtils(object): def test_trk_to_isbi(self, tmpdir): # start with dummy lineage - # convert to ISBI file - # read file and validate + # convert to ISBI array + # validate array track = {} # first cell, skips frame 3 but divides in frame 4 @@ -76,19 +80,14 @@ def test_trk_to_isbi(self, tmpdir): 'parent': 3, 'label': 4, } - isbifile = os.path.join(str(tmpdir), 'test_trk_to_isbi') - isbi_utils.trk_to_isbi(track, isbifile) + df = isbi_utils.trk_to_isbi(track) - with open(isbifile, 'rb') as f: - data = set(l.decode() for l in f.readlines()) - - expected = { - '1 0 4 0{}'.format(os.linesep), - '2 5 5 1{}'.format(os.linesep), - '3 5 5 1{}'.format(os.linesep), - '4 7 7 0{}'.format(os.linesep), # no parent; not consecutive frame - } - assert data == expected + expected = [{'Cell_ID': 1, 'Start': 0, 'End': 4, 'Parent_ID': 0}, + {'Cell_ID': 2, 'Start': 5, 'End': 5, 'Parent_ID': 1}, + {'Cell_ID': 3, 'Start': 5, 'End': 5, 'Parent_ID': 1}, + {'Cell_ID': 4, 'Start': 7, 'End': 7, 'Parent_ID': 0}] + expected_df = pd.DataFrame(expected) + assert df.equals(expected_df) def test_txt_to_graph(self, tmpdir): # cell_id, start, end, parent_id @@ -127,6 +126,31 @@ def test_txt_to_graph(self, tmpdir): else: assert not G.in_degree(daughter_id) + def test_isbi_to_graph(self): + # cell_id, start, end, parent_id + data = [{'Cell_ID': 1, 'Start': 0, 'End': 3, 'Parent_ID': 0}, + {'Cell_ID': 2, 'Start': 0, 'End': 2, 'Parent_ID': 0}, + {'Cell_ID': 3, 'Start': 3, 'End': 3, 'Parent_ID': 2}, + {'Cell_ID': 4, 'Start': 3, 'End': 3, 'Parent_ID': 2}, + {'Cell_ID': 5, 'Start': 3, 'End': 3, 'Parent_ID': 4}] + df = pd.DataFrame(data) + G = isbi_utils.isbi_to_graph(df) + for d in data: + node_ids = ['{}_{}'.format(d["Cell_ID"], t) + for t in range(d["Start"], d["End"] + 1)] + + for node_id in node_ids: + assert node_id in G + + if d["Parent_ID"]: # should have a division + daughter_id = '{}_{}'.format(d["Cell_ID"], d["Start"]) + parent_id = '{}_{}'.format(d["Parent_ID"], d["Start"] - 1) + if G.has_node(parent_id): + assert G.nodes[parent_id]['division'] is True + assert G.has_edge(parent_id, daughter_id) + else: + assert not G.in_degree(daughter_id) + def test_classify_divisions(self): G = nx.DiGraph() G.add_edge('1_0', '1_1') @@ -222,7 +246,6 @@ def test_match_nodes(self): frames=frames, mov_type='sequential', seed=1, data_format='channels_last') - gtcells, rescells = isbi_utils.match_nodes(y1, y2) assert len(rescells) == len(gtcells) @@ -230,3 +253,80 @@ def test_match_nodes(self): # because movies have the same first frame, every # iteration of unique values should match original label assert gt_cell == rescells[loc * 3] + + def test_benchmark_division_performance(self, tmpdir): + trk_gt = os.path.join(str(tmpdir), 'test_benchmark_gt.trk') + trk_res = os.path.join(str(tmpdir), 'test_benchmark_res.trk') + + # Generate lineage data + tracks_gt = {1: {'label': 1, 'frames': [1, 2], 'daughters': [], + 'capped': False, 'frame_div': None, 'parent': 3}, + 2: {'label': 2, 'frames': [1, 2], 'daughters': [], + 'capped': False, 'frame_div': None, 'parent': 3}, + 3: {'label': 3, 'frames': [0], 'daughters': [1, 2], + 'capped': False, 'frame_div': 1, 'parent': None}} + X_gt = [] + # Generate tracked movie + y_gt = get_annotated_movie(img_size=256, + labels_per_frame=3, + frames=3, + mov_type='sequential', seed=0, + data_format='channels_last') + # Let results be same as ground truth + tracks_res = tracks_gt + X_res = [] + y_res = y_gt + + # Save gt and res data to .trk files + with tarfile.open(trk_gt, 'w:gz') as trks: + # disable auto deletion and close/delete manually + # to resolve double-opening issue on Windows. + with tempfile.NamedTemporaryFile('w', delete=False) as lineage: + json.dump(tracks_gt, lineage, indent=4) + lineage.flush() + lineage.close() + trks.add(lineage.name, 'lineage.json') + os.remove(lineage.name) + + with tempfile.NamedTemporaryFile(delete=False) as raw: + np.save(raw, X_gt) + raw.flush() + raw.close() + trks.add(raw.name, 'raw.npy') + os.remove(raw.name) + + with tempfile.NamedTemporaryFile(delete=False) as tracked: + np.save(tracked, y_gt) + tracked.flush() + tracked.close() + trks.add(tracked.name, 'tracked.npy') + os.remove(tracked.name) + + with tarfile.open(trk_res, 'w:gz') as trks: + # disable auto deletion and close/delete manually + # to resolve double-opening issue on Windows. + with tempfile.NamedTemporaryFile('w', delete=False) as lineage: + json.dump(tracks_res, lineage, indent=4) + lineage.flush() + lineage.close() + trks.add(lineage.name, 'lineage.json') + os.remove(lineage.name) + + with tempfile.NamedTemporaryFile(delete=False) as raw: + np.save(raw, X_res) + raw.flush() + raw.close() + trks.add(raw.name, 'raw.npy') + os.remove(raw.name) + + with tempfile.NamedTemporaryFile(delete=False) as tracked: + np.save(tracked, y_res) + tracked.flush() + tracked.close() + trks.add(tracked.name, 'tracked.npy') + os.remove(tracked.name) + + expected = {'Correct division': 1, 'Incorrect division': 0, + 'False positive division': 0, 'False negative division': 0} + results = isbi_utils.benchmark_division_performance(trk_gt, trk_res) + assert results == expected diff --git a/setup.py b/setup.py index 8a76a43..fcb1962 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ readme = f.read() -VERSION = '0.4.2' +VERSION = '0.4.3' NAME = 'DeepCell_Tracking' DESCRIPTION = 'Tracking cells and lineage with deep learning.' LICENSE = 'LICENSE'