Skip to content

Commit

Permalink
Change isbi_utils from writing files to comparing in memory (#67)
Browse files Browse the repository at this point in the history
* Add new `trk_to_isbi` that takes a trk and transforms it into a `pd.DataFrame`. `trk_to_graph` now calls this internally.

* Deprecate path arguments in `benchmark_division_performance`, they no longer need to be passed and no files are written if they are not.

* Use DeprecationWarning if path arguments are passed to `benchmark_division_performance`.

* Bump version to 0.4.3
  • Loading branch information
R-Ding authored Aug 5, 2021
1 parent a5b7262 commit cd8eea4
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 56 deletions.
112 changes: 72 additions & 40 deletions deepcell_tracking/isbi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,37 +35,51 @@
import networkx as nx
import numpy as np
import pandas as pd
import warnings

from deepcell_toolbox import compute_overlap
from deepcell_tracking.utils import load_trks


def trk_to_isbi(track, path):
def trk_to_isbi(track, path=None):
"""Convert a lineage track into an ISBI formatted text file.
Args:
track (dict): Cell lineage object.
path (str): Path to save the .txt file.
path (str): Path to save the .txt file (deprecated).
Returns:
pd.DataFrame: DataFrame of ISBI data for each label.
"""
with open(path, 'w') as text_file:
for label in track:
first_frame = min(track[label]['frames'])
last_frame = max(track[label]['frames'])
parent = track[label]['parent']
parent = 0 if parent is None else parent
if parent:
parent_frames = track[parent]['frames']
if parent_frames[-1] != first_frame - 1:
parent = 0

line = '{cell_id} {start} {end} {parent}\n'.format(
cell_id=label,
start=first_frame,
end=last_frame,
parent=parent
)

text_file.write(line)
isbi = []
for label in track:
first_frame = min(track[label]['frames'])
last_frame = max(track[label]['frames'])
parent = track[label]['parent']
parent = 0 if parent is None else parent
if parent:
parent_frames = track[parent]['frames']
if parent_frames[-1] != first_frame - 1:
parent = 0

isbi_dict = {'Cell_ID': label,
'Start': first_frame,
'End': last_frame,
'Parent_ID': parent}
isbi.append(isbi_dict)

if path is not None:
with open(path, 'w') as text_file:
for cell in isbi_dict:
line = '{cell_id} {start} {end} {parent}\n'.format(
cell_id=cell['Cell_ID'],
start=cell['Start'],
end=cell['End'],
parent=cell['Parent_ID']
)
text_file.write(line)
df = pd.DataFrame(isbi)
return df


def contig_tracks(label, batch_info, batch_tracked):
Expand Down Expand Up @@ -145,7 +159,6 @@ def match_nodes(gt, res):
Raises:
ValueError: If .
"""

num_frames = gt.shape[0]
iou = np.zeros((num_frames, np.max(gt) + 1, np.max(res) + 1))

Expand Down Expand Up @@ -192,16 +205,33 @@ def txt_to_graph(path, node_key=None):
Args:
path (str): Path to the ISBI text file.
node_key (dict): Map between gt nodes and result nodes
Returns:
networkx.Graph: Graph representation of the text file.
networkx.Graph: Graph representation of the ISBI data.
Raises:
ValueError: If the Parent_ID is not in any previous frames.
"""
names = ['Cell_ID', 'Start', 'End', 'Parent_ID']
df = pd.read_csv(path, header=None, sep=' ', names=names)
G = isbi_to_graph(df, node_key)
return G


def isbi_to_graph(df, node_key=None):
"""Create a Graph from DataFrame of ISBI info.
Args:
data (pd.DataFrame): DataFrame of ISBI-style info.
node_key (dict): Map between gt nodes and result nodes
Returns:
networkx.Graph: Graph representation of the ISBI data.
Raises:
ValueError: If the Parent_ID is not in any previous frames.
"""
if node_key is not None:
df[['Cell_ID', 'Parent_ID']] = df[['Cell_ID', 'Parent_ID']].replace(node_key)

Expand Down Expand Up @@ -237,7 +267,7 @@ def txt_to_graph(path, node_key=None):
if source not in all_ids: # parents should be in the previous frame.
# parent_frame = df[df['Cell_ID'] == row['Parent_id']]['End']
# source = '{}_{}'.format(row['Parent_ID'], parent_frame)
print('%s: skipped parent %s to daughter %s' % (path, source, row['Cell_ID']))
print('skipped parent %s to daughter %s' % (source, row['Cell_ID']))
continue

target = '{}_{}'.format(row['Cell_ID'], row['Start'])
Expand Down Expand Up @@ -329,21 +359,20 @@ def classify_divisions(G_gt, G_res):
}


def benchmark_division_performance(trk_gt, trk_res, path_gt, path_res):
"""Compare two related .trk files (one being the GT of the other) and meaasure
performance on the the divisions in the GT file. This function produces two .txt
documents as a by-product (ISBI-style lineage documents)
# TODO: there should be an option to not write the files but compare in memory
def benchmark_division_performance(trk_gt, trk_res, path_gt=None, path_res=None):
"""Compare two related .trk files (one being the GT of the other) and measure
performance on the the divisions in the GT file.
Args:
trk_gt (path): Path to the ground truth .trk file.
trk_res (path): Path to the predicted results .trk file.
path_gt (path): Desired destination path for the GT ISBI-style .txt file.
path_res (path): Desired destination path for the result ISBI-style .txt file.
path_gt (path): Desired destination path for the GT ISBI-style .txt
file (deprecated).
path_res (path): Desired destination path for the result ISBI-style
.txt file (deprecated).
Returns:
dict: Diciontary of all division statistics.
dict: Dictionary of all division statistics.
"""
# Identify nodes with parent attribute
# Load both .trk
Expand All @@ -352,23 +381,26 @@ def benchmark_division_performance(trk_gt, trk_res, path_gt, path_res):
trks = load_trks(trk_res)
lineage_res, _, y_res = trks['lineages'][0], trks['X'], trks['y']

# Produce ISBI style text doc to work with
trk_to_isbi(lineage_gt, path_gt)
trk_to_isbi(lineage_res, path_res)
# Produce ISBI style array to work with
if path_gt is not None or path_res is not None:
warnings.warn('The `path_gt` and `path_res` arguments are deprecated.',
DeprecationWarning)
gt = trk_to_isbi(lineage_gt, path_gt)
res = trk_to_isbi(lineage_res, path_res)

# Match up labels in GT to Results to allow for direct comparisons
cells_gt, cells_res = match_nodes(y_gt, y_res)

if len(np.unique(cells_res)) < len(np.unique(cells_gt)):
node_key = {r: g for g, r in zip(cells_gt, cells_res)}
# node_key maps gt nodes onto resnodes so must be applied to gt
G_res = txt_to_graph(path_res, node_key=node_key)
G_gt = txt_to_graph(path_gt)
G_res = isbi_to_graph(res, node_key=node_key)
G_gt = isbi_to_graph(gt)
div_results = classify_divisions(G_gt, G_res)
else:
node_key = {g: r for g, r in zip(cells_gt, cells_res)}
G_res = txt_to_graph(path_res)
G_gt = txt_to_graph(path_gt, node_key=node_key)
G_res = isbi_to_graph(res)
G_gt = isbi_to_graph(gt, node_key=node_key)
div_results = classify_divisions(G_gt, G_res)

return div_results
130 changes: 115 additions & 15 deletions deepcell_tracking/isbi_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@

import copy
import os
import tarfile
import tempfile
import json

import networkx as nx
import numpy as np
import pandas as pd

from deepcell_tracking import isbi_utils
from deepcell_tracking.test_utils import get_annotated_movie
Expand All @@ -43,8 +47,8 @@ class TestIsbiUtils(object):

def test_trk_to_isbi(self, tmpdir):
# start with dummy lineage
# convert to ISBI file
# read file and validate
# convert to ISBI array
# validate array

track = {}
# first cell, skips frame 3 but divides in frame 4
Expand Down Expand Up @@ -76,19 +80,14 @@ def test_trk_to_isbi(self, tmpdir):
'parent': 3,
'label': 4,
}
isbifile = os.path.join(str(tmpdir), 'test_trk_to_isbi')
isbi_utils.trk_to_isbi(track, isbifile)
df = isbi_utils.trk_to_isbi(track)

with open(isbifile, 'rb') as f:
data = set(l.decode() for l in f.readlines())

expected = {
'1 0 4 0{}'.format(os.linesep),
'2 5 5 1{}'.format(os.linesep),
'3 5 5 1{}'.format(os.linesep),
'4 7 7 0{}'.format(os.linesep), # no parent; not consecutive frame
}
assert data == expected
expected = [{'Cell_ID': 1, 'Start': 0, 'End': 4, 'Parent_ID': 0},
{'Cell_ID': 2, 'Start': 5, 'End': 5, 'Parent_ID': 1},
{'Cell_ID': 3, 'Start': 5, 'End': 5, 'Parent_ID': 1},
{'Cell_ID': 4, 'Start': 7, 'End': 7, 'Parent_ID': 0}]
expected_df = pd.DataFrame(expected)
assert df.equals(expected_df)

def test_txt_to_graph(self, tmpdir):
# cell_id, start, end, parent_id
Expand Down Expand Up @@ -127,6 +126,31 @@ def test_txt_to_graph(self, tmpdir):
else:
assert not G.in_degree(daughter_id)

def test_isbi_to_graph(self):
# cell_id, start, end, parent_id
data = [{'Cell_ID': 1, 'Start': 0, 'End': 3, 'Parent_ID': 0},
{'Cell_ID': 2, 'Start': 0, 'End': 2, 'Parent_ID': 0},
{'Cell_ID': 3, 'Start': 3, 'End': 3, 'Parent_ID': 2},
{'Cell_ID': 4, 'Start': 3, 'End': 3, 'Parent_ID': 2},
{'Cell_ID': 5, 'Start': 3, 'End': 3, 'Parent_ID': 4}]
df = pd.DataFrame(data)
G = isbi_utils.isbi_to_graph(df)
for d in data:
node_ids = ['{}_{}'.format(d["Cell_ID"], t)
for t in range(d["Start"], d["End"] + 1)]

for node_id in node_ids:
assert node_id in G

if d["Parent_ID"]: # should have a division
daughter_id = '{}_{}'.format(d["Cell_ID"], d["Start"])
parent_id = '{}_{}'.format(d["Parent_ID"], d["Start"] - 1)
if G.has_node(parent_id):
assert G.nodes[parent_id]['division'] is True
assert G.has_edge(parent_id, daughter_id)
else:
assert not G.in_degree(daughter_id)

def test_classify_divisions(self):
G = nx.DiGraph()
G.add_edge('1_0', '1_1')
Expand Down Expand Up @@ -222,11 +246,87 @@ def test_match_nodes(self):
frames=frames,
mov_type='sequential', seed=1,
data_format='channels_last')

gtcells, rescells = isbi_utils.match_nodes(y1, y2)

assert len(rescells) == len(gtcells)
for loc, gt_cell in enumerate(np.unique(gtcells)):
# because movies have the same first frame, every
# iteration of unique values should match original label
assert gt_cell == rescells[loc * 3]

def test_benchmark_division_performance(self, tmpdir):
trk_gt = os.path.join(str(tmpdir), 'test_benchmark_gt.trk')
trk_res = os.path.join(str(tmpdir), 'test_benchmark_res.trk')

# Generate lineage data
tracks_gt = {1: {'label': 1, 'frames': [1, 2], 'daughters': [],
'capped': False, 'frame_div': None, 'parent': 3},
2: {'label': 2, 'frames': [1, 2], 'daughters': [],
'capped': False, 'frame_div': None, 'parent': 3},
3: {'label': 3, 'frames': [0], 'daughters': [1, 2],
'capped': False, 'frame_div': 1, 'parent': None}}
X_gt = []
# Generate tracked movie
y_gt = get_annotated_movie(img_size=256,
labels_per_frame=3,
frames=3,
mov_type='sequential', seed=0,
data_format='channels_last')
# Let results be same as ground truth
tracks_res = tracks_gt
X_res = []
y_res = y_gt

# Save gt and res data to .trk files
with tarfile.open(trk_gt, 'w:gz') as trks:
# disable auto deletion and close/delete manually
# to resolve double-opening issue on Windows.
with tempfile.NamedTemporaryFile('w', delete=False) as lineage:
json.dump(tracks_gt, lineage, indent=4)
lineage.flush()
lineage.close()
trks.add(lineage.name, 'lineage.json')
os.remove(lineage.name)

with tempfile.NamedTemporaryFile(delete=False) as raw:
np.save(raw, X_gt)
raw.flush()
raw.close()
trks.add(raw.name, 'raw.npy')
os.remove(raw.name)

with tempfile.NamedTemporaryFile(delete=False) as tracked:
np.save(tracked, y_gt)
tracked.flush()
tracked.close()
trks.add(tracked.name, 'tracked.npy')
os.remove(tracked.name)

with tarfile.open(trk_res, 'w:gz') as trks:
# disable auto deletion and close/delete manually
# to resolve double-opening issue on Windows.
with tempfile.NamedTemporaryFile('w', delete=False) as lineage:
json.dump(tracks_res, lineage, indent=4)
lineage.flush()
lineage.close()
trks.add(lineage.name, 'lineage.json')
os.remove(lineage.name)

with tempfile.NamedTemporaryFile(delete=False) as raw:
np.save(raw, X_res)
raw.flush()
raw.close()
trks.add(raw.name, 'raw.npy')
os.remove(raw.name)

with tempfile.NamedTemporaryFile(delete=False) as tracked:
np.save(tracked, y_res)
tracked.flush()
tracked.close()
trks.add(tracked.name, 'tracked.npy')
os.remove(tracked.name)

expected = {'Correct division': 1, 'Incorrect division': 0,
'False positive division': 0, 'False negative division': 0}
results = isbi_utils.benchmark_division_performance(trk_gt, trk_res)
assert results == expected
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
readme = f.read()


VERSION = '0.4.2'
VERSION = '0.4.3'
NAME = 'DeepCell_Tracking'
DESCRIPTION = 'Tracking cells and lineage with deep learning.'
LICENSE = 'LICENSE'
Expand Down

0 comments on commit cd8eea4

Please sign in to comment.