From d7d9d30180df45061311dd7c65008e3945db967c Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 5 Nov 2024 17:16:41 -0600 Subject: [PATCH] perf: removing output arrays from reference cycles so they don't have to wait for GC (#1305) * perf: desperate attempts to unlink output arrays * implement https://github.com/scikit-hep/uproot5/pull/1305#issuecomment-2407868371 * clear values dict to avoid holding onto computed arrays * clear scope to avoid holding onto big arrays & add a note for the future... --------- Co-authored-by: Peter Fackeldey --- src/uproot/behaviors/TBranch.py | 23 ++++++++++++++++++----- src/uproot/language/python.py | 4 ++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index 2d2778564..3fb57ab7e 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -210,10 +210,16 @@ def iterate( arrays, report = item arrays = library.global_index(arrays, global_offset) report = report.to_global(global_offset) - yield arrays, report + popper = [arrays] + del arrays + del item + yield popper.pop(), report + else: - arrays = library.global_index(item, global_offset) - yield arrays + popper = [library.global_index(item, global_offset)] + del item + yield popper.pop() + except uproot.exceptions.KeyInFileError: if allow_missing: continue @@ -1111,6 +1117,9 @@ def iterate( ak_add_doc, ) + # no longer needed; save memory + del output + next_baskets = {} for branch, basket_num, basket in ranges_or_baskets: basket_entry_start, basket_entry_stop = basket.entry_start_stop @@ -1119,10 +1128,14 @@ def iterate( previous_baskets = next_baskets + # no longer needed; save memory + popper = [out] + del out + if report: - yield out, Report(self, sub_entry_start, sub_entry_stop) + yield popper.pop(), Report(self, sub_entry_start, sub_entry_stop) else: - yield out + yield popper.pop() def keys( self, diff --git a/src/uproot/language/python.py b/src/uproot/language/python.py index e3ce8838a..f72f68f65 100644 --- a/src/uproot/language/python.py +++ b/src/uproot/language/python.py @@ -516,6 +516,10 @@ def getter(name): else: output[name] = output[name][cut] + # clear dicts to get rid of big arrays. + # note: without this these arrays are not properly released from memory! + values.clear() + scope.clear() return output