-
Notifications
You must be signed in to change notification settings - Fork 319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speeds up module.add_debug_info and add_metadata by 2x #887
base: main
Are you sure you want to change the base?
Changes from all commits
84812fa
1edf17d
2b9dc33
e5585c7
1032046
6e4590c
0281979
bffb1f3
c5ad886
4960a20
7f3392d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,4 +1,3 @@ | ||||||||||||
from collections import defaultdict | ||||||||||||
|
||||||||||||
|
||||||||||||
class DuplicatedNameError(NameError): | ||||||||||||
|
@@ -8,7 +7,7 @@ class DuplicatedNameError(NameError): | |||||||||||
class NameScope(object): | ||||||||||||
def __init__(self): | ||||||||||||
self._useset = set(['']) | ||||||||||||
self._basenamemap = defaultdict(int) | ||||||||||||
self._basenamemap = {} | ||||||||||||
|
||||||||||||
def is_used(self, name): | ||||||||||||
return name in self._useset | ||||||||||||
|
@@ -23,10 +22,18 @@ def register(self, name, deduplicate=False): | |||||||||||
|
||||||||||||
def deduplicate(self, name): | ||||||||||||
basename = name | ||||||||||||
|
||||||||||||
try: | ||||||||||||
ident = self._basenamemap[basename] | ||||||||||||
except KeyError: | ||||||||||||
ident = 0 | ||||||||||||
Comment on lines
+26
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the emulation of a default dict I mentioned in the comment above - could it just be
Suggested change
with |
||||||||||||
|
||||||||||||
while self.is_used(name): | ||||||||||||
ident = self._basenamemap[basename] + 1 | ||||||||||||
self._basenamemap[basename] = ident | ||||||||||||
ident += 1 | ||||||||||||
name = "{0}.{1}".format(basename, ident) | ||||||||||||
|
||||||||||||
self._basenamemap[basename] = ident | ||||||||||||
|
||||||||||||
return name | ||||||||||||
|
||||||||||||
def get_child(self): | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,14 +52,17 @@ def add_metadata(self, operands): | |
if not isinstance(operands, (list, tuple)): | ||
raise TypeError("expected a list or tuple of metadata values, " | ||
"got %r" % (operands,)) | ||
operands = self._fix_metadata_operands(operands) | ||
key = tuple(operands) | ||
if key not in self._metadatacache: | ||
n = len(self.metadata) | ||
md = values.MDValue(self, operands, name=str(n)) | ||
self._metadatacache[key] = md | ||
else: | ||
md = self._metadatacache[key] | ||
|
||
fixed_operands = self._fix_metadata_operands(operands) | ||
key = hash(tuple(fixed_operands)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the PR description, I see
I'm not sure I can understand where it was generated twice before - am I missing that somewhere? In any case, I do think the changes here are an improvement - reverting the specific changes to this file does slow down the performance test in |
||
try: | ||
return self._metadatacache[key] | ||
except KeyError: | ||
pass | ||
|
||
n = len(self.metadata) | ||
md = values.MDValue(self, fixed_operands, name=str(n)) | ||
self._metadatacache[key] = md | ||
return md | ||
|
||
def add_debug_info(self, kind, operands, is_distinct=False): | ||
|
@@ -72,14 +75,18 @@ def add_debug_info(self, kind, operands, is_distinct=False): | |
A DIValue instance is returned, it can then be associated to e.g. | ||
an instruction. | ||
""" | ||
operands = tuple(sorted(self._fix_di_operands(operands.items()))) | ||
key = (kind, operands, is_distinct) | ||
if key not in self._metadatacache: | ||
n = len(self.metadata) | ||
di = values.DIValue(self, is_distinct, kind, operands, name=str(n)) | ||
self._metadatacache[key] = di | ||
else: | ||
di = self._metadatacache[key] | ||
fixed_operands = self._fix_di_operands(sorted(operands.items())) | ||
key = hash((kind, tuple(fixed_operands), is_distinct)) | ||
|
||
try: | ||
return self._metadatacache[key] | ||
except KeyError: | ||
pass | ||
|
||
n = len(self.metadata) | ||
di = values.DIValue( | ||
self, is_distinct, kind, fixed_operands, name=str(n)) | ||
self._metadatacache[key] = di | ||
return di | ||
|
||
def add_named_metadata(self, name, element=None): | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -667,6 +667,7 @@ def __init__(self, parent, values, name): | |||||||||||
types.MetaDataType(), | ||||||||||||
name=name) | ||||||||||||
self.operands = tuple(values) | ||||||||||||
self.hash_cache = None | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like |
||||||||||||
parent.metadata.append(self) | ||||||||||||
|
||||||||||||
def descr(self, buf): | ||||||||||||
|
@@ -694,8 +695,17 @@ def __eq__(self, other): | |||||||||||
def __ne__(self, other): | ||||||||||||
return not self.__eq__(other) | ||||||||||||
|
||||||||||||
def __getstate__(self): | ||||||||||||
# Ensure that the hash is not cached between Python invocations | ||||||||||||
# due to pickling or other serialization. The hash seed changes | ||||||||||||
# which will cause these not to match. | ||||||||||||
self.hash_cache = None | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems a bit odd to mutate state inside |
||||||||||||
return self.__dict__ | ||||||||||||
Comment on lines
+702
to
+703
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. e.g.
Suggested change
|
||||||||||||
|
||||||||||||
def __hash__(self): | ||||||||||||
return hash(self.operands) | ||||||||||||
if self.hash_cache is None: | ||||||||||||
self.hash_cache = hash(self.operands) | ||||||||||||
return self.hash_cache | ||||||||||||
|
||||||||||||
|
||||||||||||
class DIToken: | ||||||||||||
|
@@ -725,6 +735,7 @@ def __init__(self, parent, is_distinct, kind, operands, name): | |||||||||||
self.is_distinct = is_distinct | ||||||||||||
self.kind = kind | ||||||||||||
self.operands = tuple(operands) | ||||||||||||
self.hash_cache = None | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment on the name to above, i.e. should it be |
||||||||||||
parent.metadata.append(self) | ||||||||||||
|
||||||||||||
def descr(self, buf): | ||||||||||||
|
@@ -767,8 +778,17 @@ def __eq__(self, other): | |||||||||||
def __ne__(self, other): | ||||||||||||
return not self.__eq__(other) | ||||||||||||
|
||||||||||||
def __getstate__(self): | ||||||||||||
# Ensure that the hash is not cached between Python invocations | ||||||||||||
# due to pickling or other serialization. The hash seed changes | ||||||||||||
# which will cause these not to match. | ||||||||||||
self.hash_cache = None | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also should this invalidate the hash cache on a copy? |
||||||||||||
return self.__dict__ | ||||||||||||
|
||||||||||||
def __hash__(self): | ||||||||||||
return hash((self.is_distinct, self.kind, self.operands)) | ||||||||||||
if self.hash_cache is None: | ||||||||||||
self.hash_cache = hash(self.operands) | ||||||||||||
return self.hash_cache | ||||||||||||
|
||||||||||||
|
||||||||||||
class GlobalValue(NamedValue, _ConstOpMixin, _HasMetadata): | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import pickle | ||
import re | ||
import textwrap | ||
import timeit | ||
import unittest | ||
|
||
from . import TestCase | ||
|
@@ -526,6 +527,94 @@ def test_debug_info_unicode_string(self): | |
name = ''.join(map(lambda x: f"\\{x:02x}", "∆".encode())) | ||
self.assert_ir_line(f'!0 = !DILocalVariable(name: "a{name}")', strmod) | ||
|
||
def test_debug_info_caching(self): | ||
mod = None | ||
foo = None | ||
builder = None | ||
di_subprogram = None | ||
|
||
def setup_test(): | ||
nonlocal mod, foo, builder, di_subprogram | ||
mod = self.module() | ||
|
||
di_file = mod.add_debug_info( | ||
'DIFile', | ||
{ | ||
'directory': 'my_directory', | ||
'filename': 'my_path.foo', | ||
}) | ||
|
||
di_compile_unit = mod.add_debug_info( | ||
'DICompileUnit', | ||
{ | ||
'emissionKind': ir.DIToken('FullDebug'), | ||
'file': di_file, | ||
'isOptimized': False, | ||
'language': ir.DIToken('DW_LANG_C99'), | ||
'producer': 'llvmlite-test', | ||
}) | ||
|
||
di_subprogram = mod.add_debug_info( | ||
'DISubprogram', | ||
{ | ||
'file': di_file, | ||
'line': 123, | ||
'name': 'my function', | ||
'scope': di_file, | ||
'scopeLine': 123, | ||
'unit': di_compile_unit, | ||
}) | ||
|
||
foo = ir.Function(mod, ir.FunctionType(ir.VoidType(), []), 'foo') | ||
builder = ir.IRBuilder(foo.append_basic_block('')) | ||
|
||
def do_test(): | ||
for i in range(100_000): | ||
di_location = mod.add_debug_info( | ||
'DILocation', | ||
{ | ||
'scope': di_subprogram, | ||
'line': i, | ||
'column': 15, | ||
'other': [di_subprogram, di_subprogram], | ||
}) | ||
|
||
builder.debug_metadata = di_location | ||
|
||
builder.and_( | ||
ir.Constant(ir.IntType(bits=32), i), | ||
ir.Constant(ir.IntType(bits=32), i + 1)) | ||
|
||
total_time = timeit.timeit( | ||
'do_test()', | ||
'setup_test()', | ||
number=10, | ||
globals=locals()) | ||
|
||
print('test_debug_info_performance took', total_time, 'to finish') | ||
|
||
self.assertEqual(100004, len(mod._metadatacache)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the expected result could do with a comment explaining its derivation - it took me a while to work out that it's 100000 locations, plus one file, one compile unit, one subprogram, and |
||
|
||
def test_debug_info_pickle(self): | ||
mod = self.module() | ||
|
||
di_file = mod.add_debug_info( | ||
'DIFile', | ||
{ | ||
'directory': 'my_directory', | ||
'filename': 'my_path.foo', | ||
}) | ||
self.assertEqual(hash(di_file), di_file.hash_cache) | ||
found_di_file = pickle.loads(pickle.dumps(di_file)) | ||
self.assertIsNone(found_di_file.hash_cache) | ||
self.assertEqual(di_file, found_di_file) | ||
|
||
meta = mod.add_metadata([di_file]) | ||
self.assertEqual(hash(meta), meta.hash_cache) | ||
found_meta = pickle.loads(pickle.dumps(meta)) | ||
self.assertIsNone(found_meta.hash_cache) | ||
self.assertEqual(meta, found_meta) | ||
|
||
def test_inline_assembly(self): | ||
mod = self.module() | ||
foo = ir.Function(mod, ir.FunctionType(ir.VoidType(), []), 'foo') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is a defaultdict slower than a normal dict? It seems that making this change here requires you to then emulate a default dict below - is there a reason not to keep it as a defaultdict?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Follow-up - I tried reverting the changes in this file and it seemed to make no performance difference in
test_debug_info_caching
)