diff --git a/.gitignore b/.gitignore index 7cf3c4751..d7c5d9d03 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ htmlcov lextab.py yacctab.py .pytest_cache/* +.DS_STORE loopy/_git_rev.py diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index f0be543a5..c0bc38527 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -262,6 +262,7 @@ Instructions .. {{{ +.. autoclass:: HappensAfter .. autoclass:: InstructionBase .. _assignments: @@ -459,6 +460,10 @@ Loopy's expressions are a slight superset of the expressions supported by TODO: Functions TODO: Reductions +Dependencies +^^^^^^^^^^^^ +.. automodule:: loopy.kernel.dependency + Function Call Instructions ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 9ef012d66..81d864a1f 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -13,6 +13,10 @@ Wrangling inames .. automodule:: loopy.transform.iname +Precise Dependency Finding +-------------------------- +.. automodule:: loopy.transform.dependency + Dealing with Substitution Rules ------------------------------- diff --git a/loopy/__init__.py b/loopy/__init__.py index 249f74369..cfcb61b3f 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -29,6 +29,7 @@ # {{{ imported user interface from loopy.kernel.instruction import ( + HappensAfter, LegacyStringInstructionTag, UseStreamingStoreTag, MemoryOrdering, MemoryScope, @@ -175,6 +176,7 @@ "LoopKernel", "KernelState", + "HappensAfter", "LegacyStringInstructionTag", "UseStreamingStoreTag", "MemoryOrdering", "MemoryScope", diff --git a/loopy/check.py b/loopy/check.py index 3f65ad7ab..3fdb99605 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -1058,7 +1058,6 @@ def _check_variable_access_ordered_inner(kernel): depends_on = {insn.id: set() for insn in kernel.instructions} # rev_depends: mapping from insn_ids to their reverse deps. rev_depends = {insn.id: set() for insn in kernel.instructions} - for insn in kernel.instructions: depends_on[insn.id].update(insn.depends_on) for dep in insn.depends_on: diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 7a7b769db..02381f3e2 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -193,7 +193,7 @@ def subst_func(var): def get_default_insn_options_dict(): return { - "depends_on": frozenset(), + "happens_after": frozenset(), "depends_on_is_final": False, "no_sync_with": frozenset(), "groups": frozenset(), @@ -289,14 +289,15 @@ def parse_nosync_option(opt_value): result["depends_on_is_final"] = True opt_value = (opt_value[1:]).strip() - result["depends_on"] = result["depends_on"].union(frozenset( + result["happens_after"] = result["happens_after"].union(frozenset( intern(dep.strip()) for dep in opt_value.split(":") if dep.strip())) elif opt_key == "dep_query" and opt_value is not None: from loopy.match import parse_match match = parse_match(opt_value) - result["depends_on"] = result["depends_on"].union(frozenset([match])) + result["happens_after"] = result["happens_after"].union( + frozenset([match])) elif opt_key == "nosync" and opt_value is not None: if is_with_block: @@ -684,6 +685,7 @@ def _count_open_paren_symbols(s): def parse_instructions(instructions, defines): + if isinstance(instructions, str): instructions = [instructions] @@ -716,8 +718,8 @@ def intern_if_str(s): copy_args = { "id": intern_if_str(insn.id), - "depends_on": frozenset(intern_if_str(dep) - for dep in insn.depends_on), + "happens_after": frozenset(intern_if_str(dep) + for dep in insn.happens_after), "groups": frozenset(checked_intern(grp) for grp in insn.groups), "conflicts_with_groups": frozenset( checked_intern(grp) for grp in insn.conflicts_with_groups), @@ -834,11 +836,11 @@ def intern_if_str(s): # If it's inside a for/with block, then it's # final now. bool(local_w_inames)), - depends_on=( + happens_after=( (insn.depends_on - | insn_options_stack[-1]["depends_on"]) - if insn_options_stack[-1]["depends_on"] is not None - else insn.depends_on), + | insn_options_stack[-1]["happens_after"]) + if insn_options_stack[-1]["happens_after"] is not None + else insn.happens_after), tags=( insn.tags | insn_options_stack[-1]["tags"]), @@ -1441,32 +1443,6 @@ def add_assignment(base_name, expr, dtype, additional_inames): # }}} -# {{{ add_sequential_dependencies - -def add_sequential_dependencies(knl): - new_insns = [] - prev_insn = None - for insn in knl.instructions: - depon = insn.depends_on - if depon is None: - depon = frozenset() - - if prev_insn is not None: - depon = depon | frozenset((prev_insn.id,)) - - insn = insn.copy( - depends_on=depon, - depends_on_is_final=True) - - new_insns.append(insn) - - prev_insn = insn - - return knl.copy(instructions=new_insns) - -# }}} - - # {{{ temporary variable creation def create_temporaries(knl, default_order): @@ -1816,18 +1792,20 @@ def resolve_dependencies(knl): new_insns = [] for insn in knl.instructions: - depends_on = _resolve_dependencies( - "a dependency", knl, insn, insn.depends_on) + happens_after = _resolve_dependencies( + "a dependency", knl, insn, insn.happens_after) no_sync_with = frozenset( (resolved_insn_id, nosync_scope) for nosync_dep, nosync_scope in insn.no_sync_with for resolved_insn_id in _resolve_dependencies("nosync", knl, insn, (nosync_dep,))) - if depends_on == insn.depends_on and no_sync_with == insn.no_sync_with: + if happens_after == insn.happens_after and \ + no_sync_with == insn.no_sync_with: new_insn = insn else: - new_insn = insn.copy(depends_on=depends_on, no_sync_with=no_sync_with) + new_insn = insn.copy(happens_after=happens_after, + no_sync_with=no_sync_with) new_insns.append(new_insn) return knl.copy(instructions=new_insns) @@ -1921,13 +1899,17 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True, # }}} - depends_on = insn.depends_on - if depends_on is None: - depends_on = frozenset() + happens_after = insn.happens_after + + if not isinstance(happens_after, frozenset): + happens_after = frozenset(happens_after) - new_deps = frozenset(auto_deps) | depends_on + if happens_after is None: + happens_after = frozenset() - if new_deps != depends_on: + new_deps = frozenset(auto_deps) | frozenset(happens_after) + + if new_deps != happens_after: msg = ( "The single-writer dependency heuristic added dependencies " "on instruction ID(s) '%s' to instruction ID '%s' after " @@ -1936,13 +1918,13 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True, "To fix this, ensure that instruction dependencies " "are added/resolved as soon as possible, ideally at kernel " "creation time." - % (", ".join(new_deps - depends_on), insn.id)) + % (", ".join(new_deps - happens_after), insn.id)) if warn_if_used: warn_with_kernel(kernel, "single_writer_after_creation", msg) if error_if_used: raise LoopyError(msg) - insn = insn.copy(depends_on=new_deps) + insn = insn.copy(happens_after=new_deps) changed = True new_insns.append(insn) @@ -2523,7 +2505,8 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): check_for_duplicate_insn_ids(knl) if seq_dependencies: - knl = add_sequential_dependencies(knl) + from loopy.kernel.dependency import add_lexicographic_happens_after + knl = add_lexicographic_happens_after(knl) assert len(knl.instructions) == len(inames_to_dup) @@ -2565,7 +2548,10 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): knl = guess_arg_shape_if_requested(knl, default_order) knl = apply_default_order_to_args(knl, default_order) knl = resolve_dependencies(knl) - knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False) + + # precise dependency semantics should not rely on this + if not seq_dependencies: + knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False) # ------------------------------------------------------------------------- # Ordering dependency: diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py new file mode 100644 index 000000000..5992a1cb1 --- /dev/null +++ b/loopy/kernel/dependency.py @@ -0,0 +1,122 @@ +""" +.. autofunction:: add_lexicographic_happens_after +""" + +__copyright__ = "Copyright (C) 2023 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl +from islpy import dim_type + +from loopy import LoopKernel +from loopy.kernel.instruction import HappensAfter +from loopy.translation_unit import for_each_kernel + + +@for_each_kernel +def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: + """Construct a sequential dependency specification between each instruction + and the instruction immediately before it. This dependency information + contains a lexicographic map which acts as a description of the precise, + statement-instance level dependencies between statements. + """ + + new_insns = [] + + for iafter, insn_after in enumerate(knl.instructions): + + if iafter == 0: + new_insns.append(insn_after) + + else: + + insn_before = knl.instructions[iafter - 1] + shared_inames = insn_after.within_inames & insn_before.within_inames + + domain_before = knl.get_inames_domain(insn_before.within_inames) + domain_after = knl.get_inames_domain(insn_after.within_inames) + happens_before = isl.Map.from_domain_and_range( + domain_before, domain_after + ) + + for idim in range(happens_before.dim(dim_type.out)): + happens_before = happens_before.set_dim_name( + dim_type.out, idim, + happens_before.get_dim_name(dim_type.out, idim) + "'" + ) + + n_inames_before = happens_before.dim(dim_type.in_) + happens_before_set = happens_before.move_dims( + dim_type.out, 0, + dim_type.in_, 0, + n_inames_before).range() + + shared_inames_order_before = [ + domain_before.get_dim_name(dim_type.out, idim) + for idim in range(domain_before.dim(dim_type.out)) + if domain_before.get_dim_name(dim_type.out, idim) + in shared_inames + ] + shared_inames_order_after = [ + domain_after.get_dim_name(dim_type.out, idim) + for idim in range(domain_after.dim(dim_type.out)) + if domain_after.get_dim_name(dim_type.out, idim) + in shared_inames + ] + assert shared_inames_order_after == shared_inames_order_before + shared_inames_order = shared_inames_order_after + + affs = isl.affs_from_space(happens_before_set.space) + + lex_set = isl.Set.empty(happens_before_set.space) + for iinnermost, innermost_iname in enumerate(shared_inames_order): + + innermost_set = affs[innermost_iname].lt_set( + affs[innermost_iname+"'"] + ) + + for outer_iname in shared_inames_order[:iinnermost]: + innermost_set = innermost_set & ( + affs[outer_iname].eq_set(affs[outer_iname + "'"]) + ) + + lex_set = lex_set | innermost_set + + lex_map = isl.Map.from_range(lex_set).move_dims( + dim_type.in_, 0, + dim_type.out, 0, + n_inames_before) + + happens_before = happens_before & lex_map + + new_happens_after = { + insn_before.id: HappensAfter(None, happens_before) + } + + insn_after = insn_after.copy(happens_after=new_happens_after) + + new_insns.append(insn_after) + + return knl.copy(instructions=new_insns) + + +# vim: foldmethod=marker diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 5c28247ad..35fa052b1 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -20,9 +20,12 @@ THE SOFTWARE. """ +from dataclasses import dataclass +from typing import FrozenSet, Optional, Mapping, Tuple, Type, Union from sys import intern from functools import cached_property -from typing import FrozenSet + +from collections.abc import Mapping as MappingABC from warnings import warn import islpy as isl @@ -30,7 +33,8 @@ from pytools.tag import Tag, tag_dataclass, Taggable from loopy.diagnostic import LoopyError -from loopy.tools import Optional +from loopy.tools import Optional as LoopyOptional +from loopy.typing import ExpressionT # {{{ instruction tags @@ -76,6 +80,36 @@ class UseStreamingStoreTag(Tag): # }}} +# {{{ HappensAfter + +@dataclass(frozen=True) +class HappensAfter: + """A class representing a "happens-before" relationship between two + statements found in a :class:`loopy.LoopKernel`. Used to validate that a + given kernel transformation respects the data dependencies in a given + program. + + .. attribute:: variable_name + + The name of the variable responsible for the dependency. + + .. attribute:: instances_rel + + An :class:`islpy.Map` representing the happens-after relationship. The + input of the map is an iname tuple and the output of the map is a set + of iname tuples that must execute after the input. + + As a (deprecated) matter of backward compatibility, this may be *None*, + in which case the semantics revert to the (underspecified) statement-level + dependencies of prior versions of :mod:`loopy`. + """ + + variable_name: Optional[str] + instances_rel: Optional[isl.Map] + +# }}} + + # {{{ instructions: base class class InstructionBase(ImmutableRecord, Taggable): @@ -199,10 +233,20 @@ class InstructionBase(ImmutableRecord, Taggable): Inherits from :class:`pytools.tag.Taggable`. """ + id: Optional[str] + happens_after: Mapping[str, HappensAfter] + depends_on_is_final: bool + groups: FrozenSet[str] + conflicts_with_groups: FrozenSet[str] + no_sync_with: FrozenSet[Tuple[str, str]] + predicates: FrozenSet[ExpressionT] + within_inames: FrozenSet[str] + within_inames_is_final: bool + priority: int # within_inames_is_final is deprecated and will be removed in version 2017.x. - fields = set("id depends_on depends_on_is_final " + fields = set("id depends_on_is_final " "groups conflicts_with_groups " "no_sync_with " "predicates " @@ -215,12 +259,22 @@ class InstructionBase(ImmutableRecord, Taggable): # Names of fields that are sets of pymbolic expressions. Needed for key building pymbolic_set_fields = {"predicates"} - def __init__(self, id, depends_on, depends_on_is_final, - groups, conflicts_with_groups, - no_sync_with, - within_inames_is_final, within_inames, - priority, - predicates, tags): + def __init__(self, + id: Optional[str], + happens_after: Union[ + Mapping[str, HappensAfter], FrozenSet[str], str, None], + depends_on_is_final: Optional[bool], + groups: Optional[FrozenSet[str]], + conflicts_with_groups: Optional[FrozenSet[str]], + no_sync_with: Optional[FrozenSet[Tuple[str, str]]], + within_inames_is_final: Optional[bool], + within_inames: Optional[FrozenSet[str]], + priority: Optional[int], + predicates: Optional[FrozenSet[str]], + tags: Optional[FrozenSet[Tag]], + *, + depends_on: Union[FrozenSet[str], str, None] = None, + ) -> None: if predicates is None: predicates = frozenset() @@ -242,8 +296,46 @@ def __init__(self, id, depends_on, depends_on_is_final, predicates = frozenset(new_predicates) del new_predicates - if depends_on is None: - depends_on = frozenset() + # {{{ process happens_after/depends_on + + if happens_after is not None and depends_on is not None: + raise TypeError("may not pass both happens_after and depends_on") + elif depends_on is not None: + happens_after = depends_on + + del depends_on + + if depends_on_is_final and happens_after is None: + raise LoopyError("Setting depends_on_is_final to True requires " + "actually specifying happens_after/depends_on") + + if happens_after is None: + happens_after = {} + elif isinstance(happens_after, str): + warn("Passing a string for happens_after/depends_on is deprecated and " + "will stop working in 2024. Instead, pass a full-fledged " + "happens_after data structure.", DeprecationWarning, stacklevel=2) + + happens_after = { + after_id.strip(): HappensAfter( + variable_name=None, + instances_rel=None) + for after_id in happens_after.split(",") + if after_id.strip()} + elif isinstance(happens_after, frozenset): + happens_after = { + after_id: HappensAfter( + variable_name=None, + instances_rel=None) + for after_id in happens_after} + elif isinstance(happens_after, MappingABC): + if isinstance(happens_after, dict): + happens_after = happens_after + else: + raise TypeError("'happens_after' has unexpected type: " + f"{type(happens_after)}") + + # }}} if groups is None: groups = frozenset() @@ -260,17 +352,9 @@ def __init__(self, id, depends_on, depends_on_is_final, if within_inames_is_final is None: within_inames_is_final = False - if isinstance(depends_on, str): - depends_on = frozenset( - s.strip() for s in depends_on.split(",") if s.strip()) - if depends_on_is_final is None: depends_on_is_final = False - if depends_on_is_final and not isinstance(depends_on, frozenset): - raise LoopyError("Setting depends_on_is_final to True requires " - "actually specifying depends_on") - if tags is None: tags = frozenset() @@ -293,13 +377,12 @@ def __init__(self, id, depends_on, depends_on_is_final, # assert all(is_interned(pred) for pred in predicates) assert isinstance(within_inames, frozenset) - assert isinstance(depends_on, frozenset) or depends_on is None assert isinstance(groups, frozenset) assert isinstance(conflicts_with_groups, frozenset) ImmutableRecord.__init__(self, id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, no_sync_with=no_sync_with, groups=groups, conflicts_with_groups=conflicts_with_groups, @@ -314,6 +397,22 @@ def __init__(self, id, depends_on, depends_on_is_final, Taggable.__init__(self, tags) + def get_copy_kwargs(self, **kwargs): + passed_depends_on = "depends_on" in kwargs + + if passed_depends_on: + assert "happens_after" not in kwargs + + kwargs = super().get_copy_kwargs(**kwargs) + + if passed_depends_on: + # warn that this is deprecated + warn("depends_on is deprecated and will stop working in 2024. " + "Instead, use happens_after.", DeprecationWarning, stacklevel=2) + del kwargs["happens_after"] + + return kwargs + # {{{ abstract interface def read_dependency_names(self): @@ -355,6 +454,10 @@ def with_transformed_expressions(self, f, assignee_f=None): # }}} + @property + def depends_on(self): + return frozenset(self.happens_after) + @property def assignee_name(self): """A convenience wrapper around :meth:`assignee_var_names` @@ -463,7 +566,9 @@ def __setstate__(self, val): if self.id is not None: # pylint:disable=access-member-before-definition self.id = intern(self.id) - self.depends_on = intern_frozenset_of_ids(self.depends_on) + self.happens_after = { + intern(after_id): ha + for after_id, ha in self.happens_after.items()} self.groups = intern_frozenset_of_ids(self.groups) self.conflicts_with_groups = ( intern_frozenset_of_ids(self.conflicts_with_groups)) @@ -795,30 +900,43 @@ class Assignment(MultiAssignmentBase): .. automethod:: __init__ """ + assignee: ExpressionT + expression: ExpressionT + temp_var_type: LoopyOptional + atomicity: Tuple[VarAtomicity, ...] + fields = MultiAssignmentBase.fields | \ set("assignee temp_var_type atomicity".split()) pymbolic_fields = MultiAssignmentBase.pymbolic_fields | {"assignee"} def __init__(self, - assignee, expression, - id=None, - depends_on=None, - depends_on_is_final=None, - groups=None, - conflicts_with_groups=None, - no_sync_with=None, - within_inames_is_final=None, - within_inames=None, - tags=None, - temp_var_type=_not_provided, atomicity=(), - priority=0, predicates=frozenset()): + assignee: Union[str, ExpressionT], + expression: Union[str, ExpressionT], + id: Optional[str] = None, + happens_after: Union[ + Mapping[str, HappensAfter], FrozenSet[str], str, None] = None, + depends_on_is_final: Optional[bool] = None, + groups: Optional[FrozenSet[str]] = None, + conflicts_with_groups: Optional[FrozenSet[str]] = None, + no_sync_with: Optional[FrozenSet[Tuple[str, str]]] = None, + within_inames_is_final: Optional[bool] = None, + within_inames: Optional[FrozenSet[str]] = None, + priority: Optional[int] = None, + predicates: Optional[FrozenSet[str]] = None, + tags: Optional[FrozenSet[Tag]] = None, + temp_var_type: Union[ + Type[_not_provided], None, LoopyOptional] = _not_provided, + atomicity: Tuple[VarAtomicity, ...] = (), + *, + depends_on: Union[FrozenSet[str], str, None] = None, + ) -> None: if temp_var_type is _not_provided: - temp_var_type = Optional() + temp_var_type = LoopyOptional() super().__init__( id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, @@ -827,7 +945,8 @@ def __init__(self, within_inames=within_inames, priority=priority, predicates=predicates, - tags=tags) + tags=tags, + depends_on=depends_on) from loopy.symbolic import parse if isinstance(assignee, str): @@ -950,7 +1069,7 @@ class CallInstruction(MultiAssignmentBase): def __init__(self, assignees, expression, id=None, - depends_on=None, + happens_after=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, @@ -959,11 +1078,12 @@ def __init__(self, within_inames=None, tags=None, temp_var_types=None, - priority=0, predicates=frozenset()): + priority=0, predicates=frozenset(), + depends_on=None): super().__init__( id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, @@ -972,7 +1092,8 @@ def __init__(self, within_inames=within_inames, priority=priority, predicates=predicates, - tags=tags) + tags=tags, + depends_on=depends_on) from pymbolic.primitives import Call from loopy.symbolic import Reduction @@ -1003,7 +1124,7 @@ def __init__(self, self.expression = expression if temp_var_types is None: - self.temp_var_types = (Optional(),) * len(self.assignees) + self.temp_var_types = (LoopyOptional(),) * len(self.assignees) else: self.temp_var_types = tuple( _check_and_fix_temp_var_type(tvt, stacklevel=3) @@ -1146,7 +1267,7 @@ def modify_assignee_for_array_call(assignee): def make_assignment(assignees, expression, temp_var_types=None, **kwargs): if temp_var_types is None: - temp_var_types = (Optional(),) * len(assignees) + temp_var_types = (LoopyOptional(),) * len(assignees) if len(assignees) != 1 or is_array_call(assignees, expression): atomicity = kwargs.pop("atomicity", ()) @@ -1246,12 +1367,13 @@ class CInstruction(InstructionBase): def __init__(self, iname_exprs, code, read_variables=frozenset(), assignees=(), - id=None, depends_on=None, depends_on_is_final=None, + id=None, happens_after=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, no_sync_with=None, within_inames_is_final=None, within_inames=None, priority=0, - predicates=frozenset(), tags=None): + predicates=frozenset(), tags=None, + depends_on=None): """ :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples, simple strings pepresenting inames are also allowed. A single @@ -1264,13 +1386,14 @@ def __init__(self, InstructionBase.__init__(self, id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, within_inames_is_final=within_inames_is_final, within_inames=within_inames, - priority=priority, predicates=predicates, tags=tags) + priority=priority, predicates=predicates, tags=tags, + depends_on=depends_on) # {{{ normalize iname_exprs @@ -1414,15 +1537,15 @@ class NoOpInstruction(_DataObliviousInstruction): ... nop """ - def __init__(self, id=None, depends_on=None, depends_on_is_final=None, + def __init__(self, id=None, happens_after=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, no_sync_with=None, within_inames_is_final=None, within_inames=None, priority=None, - predicates=None, tags=None): + predicates=None, tags=None, depends_on=None): super().__init__( id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, @@ -1431,7 +1554,8 @@ def __init__(self, id=None, depends_on=None, depends_on_is_final=None, within_inames=within_inames, priority=priority, predicates=predicates, - tags=tags) + tags=tags, + depends_on=depends_on) def __str__(self): first_line = "%s: ... nop" % self.id @@ -1473,20 +1597,21 @@ class BarrierInstruction(_DataObliviousInstruction): fields = _DataObliviousInstruction.fields | {"synchronization_kind", "mem_kind"} - def __init__(self, id, depends_on=None, depends_on_is_final=None, + def __init__(self, id, happens_after=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, no_sync_with=None, within_inames_is_final=None, within_inames=None, priority=None, predicates=None, tags=None, synchronization_kind="global", - mem_kind="local"): + mem_kind="local", + depends_on=None): if predicates: raise LoopyError("conditional barriers are not supported") super().__init__( id=id, - depends_on=depends_on, + happens_after=happens_after, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, @@ -1495,8 +1620,8 @@ def __init__(self, id, depends_on=None, depends_on_is_final=None, within_inames=within_inames, priority=priority, predicates=predicates, - tags=tags - ) + tags=tags, + depends_on=depends_on) self.synchronization_kind = synchronization_kind self.mem_kind = mem_kind diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 7a220418f..16412e529 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -80,7 +80,7 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, else: insns_before = None - barrier_to_add = BarrierInstruction(depends_on=insns_before, + barrier_to_add = BarrierInstruction(happens_after=insns_before, depends_on_is_final=True, id=id, within_inames=within_inames, diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index b77c6a5ed..68327f8b9 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -392,7 +392,7 @@ def buffer_array_for_single_kernel(kernel, callables_table, var_name, within_inames=( frozenset(within_inames) | frozenset(non1_init_inames)), - depends_on=frozenset(), + happens_after=frozenset(), depends_on_is_final=True) # }}} @@ -423,7 +423,7 @@ def none_to_empty_set(s): if insn.id in aar.modified_insn_ids: new_insns.append( insn.copy( - depends_on=( + happens_after=( none_to_empty_set(insn.depends_on) | frozenset([init_insn_id])))) else: @@ -465,7 +465,7 @@ def none_to_empty_set(s): from loopy.kernel.data import Assignment store_instruction = Assignment( id=kernel.make_unique_instruction_id(based_on="store_"+var_name), - depends_on=frozenset(aar.modified_insn_ids), + happens_after=frozenset(aar.modified_insn_ids), no_sync_with=frozenset([(init_insn_id, "any")]), assignee=store_target, expression=store_expression, @@ -482,7 +482,7 @@ def none_to_empty_set(s): # new_insns_with_redirected_deps: if an insn depends on a modified # insn, then it should also depend on the store insn. new_insns_with_redirected_deps = [ - insn.copy(depends_on=(insn.depends_on | {store_instruction.id})) + insn.copy(happens_after=(insn.depends_on | {store_instruction.id})) if insn.depends_on & aar.modified_insn_ids else insn for insn in new_insns diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 33196ca67..cf835d8a3 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -390,13 +390,13 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): noop_start = NoOpInstruction( id=ing(callee_label+"_start"), within_inames=call_insn.within_inames, - depends_on=call_insn.depends_on, + happens_after=call_insn.depends_on, predicates=call_insn.predicates, ) noop_end = NoOpInstruction( id=call_insn.id, within_inames=call_insn.within_inames, - depends_on=frozenset(insn_id_map.values()), + happens_after=frozenset(insn_id_map.values()), depends_on_is_final=True, predicates=call_insn.predicates, ) @@ -423,7 +423,7 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): insn = insn.copy( id=insn_id_map[insn.id], within_inames=new_within_inames, - depends_on=new_depends_on, + happens_after=new_depends_on, depends_on_is_final=True, tags=insn.tags | call_insn.tags, atomicity=new_atomicity, @@ -434,7 +434,7 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): insn = insn.copy( id=new_id, within_inames=new_within_inames, - depends_on=new_depends_on, + happens_after=new_depends_on, tags=insn.tags | call_insn.tags, no_sync_with=new_no_sync_with, predicates=insn.predicates | call_insn.predicates, diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py new file mode 100644 index 000000000..588dc08cb --- /dev/null +++ b/loopy/transform/dependency.py @@ -0,0 +1,111 @@ +""" +.. autoclass:: AccessMapFinder +""" +__copyright__ = "Copyright (C) 2022 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + +from loopy.kernel import LoopKernel +from loopy.translation_unit import for_each_kernel +from loopy.symbolic import WalkMapper, get_access_map, \ + UnableToDetermineAccessRangeError +from loopy.typing import Expression + +import pymbolic.primitives as p +from typing import List, Dict +from pyrsistent import pmap, PMap + + +class AccessMapFinder(WalkMapper): + """Finds and stores relations representing the accesses to an array by + statement instances. Access maps can be found using an instruction's ID and + a variable's name. Essentially a specialized version of + BatchedAccessMapMapper. + """ + + def __init__(self, knl: LoopKernel) -> None: + self.kernel = knl + self._access_maps: PMap[str, PMap[str, isl.Map]] = pmap({}) + from collections import defaultdict # FIXME remove this + self.bad_subscripts: Dict[str, List[Expression]] = defaultdict(list) + + super().__init__() + + def get_map(self, insn_id: str, variable_name: str) -> isl.Map: + """Retrieve an access map indexed by an instruction ID and variable + name. + """ + try: + return self._access_maps[insn_id][variable_name] + except KeyError: + return None + + def map_subscript(self, expr, insn_id): + domain = self.kernel.get_inames_domain( + self.kernel.id_to_insn[insn_id].within_inames + ) + WalkMapper.map_subscript(self, expr, insn_id) + + assert isinstance(expr.aggregate, p.Variable) + + arg_name = expr.aggregate.name + subscript = expr.index_tuple + + try: + access_map = get_access_map( + domain, subscript, self.kernel.assumptions) + except UnableToDetermineAccessRangeError: + # may not have enough info to generate access map at current point + self.bad_subscripts[arg_name].append(expr) + return + + # analyze what we have in our access map dict before storing map + insn_to_args = self._access_maps.get(insn_id) + if insn_to_args is not None: + existing_relation = insn_to_args.get(arg_name) + + if existing_relation is not None: + access_map |= existing_relation + + self._access_maps = self._access_maps.set( + insn_id, self._access_maps[insn_id].set( + arg_name, access_map)) + + else: + self._access_maps = self._access_maps.set( + insn_id, pmap({arg_name: access_map})) + + def map_linear_subscript(self, expr, insn_id): + raise NotImplementedError("linear subscripts cannot be used with " + "precise dependency finding. Use " + "multidimensional accesses to take advantage " + "of this feature.") + + def map_reduction(self, expr, insn_id): + return WalkMapper.map_reduction(self, expr, insn_id) + + def map_type_cast(self, expr, insn_id): + return self.rec(expr.child, insn_id) + + def map_sub_array_ref(self, expr, insn_id): + raise NotImplementedError("Not yet implemented") diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index fe0bddcf3..54d0181fc 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -436,7 +436,7 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None): for insn_id in kernel_insn_ids[to_kernel]: insn = id_to_insn[insn_id] if var_name in insn.dependency_names(): - insn = insn.copy(depends_on=insn.depends_on | from_writer_ids) + insn = insn.copy(happens_after=insn.depends_on | from_writer_ids) id_to_insn[insn_id] = insn diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 9a975b749..8c5c993b5 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -133,7 +133,7 @@ def add_dep(insn): else: new_deps = new_deps | added_deps - return insn.copy(depends_on=new_deps) + return insn.copy(happens_after=new_deps) result = map_instructions(kernel, insn_match, add_dep) @@ -247,7 +247,7 @@ def remove_instructions(kernel, insn_ids): if insn_id not in insn_ids) new_insns.append( - insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with)) + insn.copy(happens_after=new_deps, no_sync_with=new_no_sync_with)) return kernel.copy( instructions=new_insns) @@ -287,7 +287,7 @@ def replace_instruction_ids_in_insn( if changed: return insn.copy( - depends_on=frozenset(new_depends_on + extra_depends_on), + happens_after=frozenset(new_depends_on + extra_depends_on), no_sync_with=frozenset(new_no_sync_with)) else: return insn diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index 6a39986a3..c9e8febd8 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -231,7 +231,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, within_inames=insn.within_inames - ilp_inames | { new_pack_inames[i].name for i in p.swept_inames} | ( new_ilp_inames), - depends_on=insn.depends_on, + happens_after=insn.depends_on, id=ing(insn.id+"_pack"), depends_on_is_final=True )) @@ -244,7 +244,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_unpack_inames[i].name for i in p.swept_inames} | ( new_ilp_inames), id=ing(insn.id+"_unpack"), - depends_on=frozenset([insn.id]), + happens_after=frozenset([insn.id]), depends_on_is_final=True )) @@ -281,7 +281,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_assignees = tuple(subst_mapper(new_id_to_parameters[-i-1]) for i, _ in enumerate(insn.assignees)) new_call_insn = new_call_insn.copy( - depends_on=new_call_insn.depends_on | { + happens_after=new_call_insn.depends_on | { pack.id for pack in packing_insns}, within_inames=new_call_insn.within_inames - ilp_inames | ( new_ilp_inames), @@ -307,7 +307,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, for old_insn_id in insn.depends_on & set(old_insn_to_new_insns): new_depends_on |= frozenset(i.id for i in old_insn_to_new_insns[old_insn_id]) - new_instructions.append(insn.copy(depends_on=new_depends_on)) + new_instructions.append(insn.copy(happens_after=new_depends_on)) kernel = kernel.copy( domains=kernel.domains + new_domains, instructions=new_instructions, diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 1187ad5d9..35d6f712f 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -322,7 +322,7 @@ def map_kernel(self, kernel): if self.replaced_something: insn = insn.copy( - depends_on=( + happens_after=( insn.depends_on | frozenset([self.compute_dep_id]))) @@ -953,7 +953,7 @@ def add_assumptions(d): from loopy.kernel.instruction import BarrierInstruction barrier_insn = BarrierInstruction( id=barrier_insn_id, - depends_on=frozenset([compute_insn_id]), + happens_after=frozenset([compute_insn_id]), synchronization_kind="global", mem_kind="global") compute_dep_id = barrier_insn_id @@ -986,7 +986,7 @@ def add_assumptions(d): kernel = kernel.copy( instructions=[ - insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) + insn.copy(happens_after=frozenset(invr.compute_insn_depends_on)) if insn.id == compute_insn_id else insn for insn in kernel.instructions]) diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index c211ab18e..63e09fa87 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -136,7 +136,7 @@ def changes_made(self): # }}} - def new_subinstruction(self, *, within_inames, depends_on, + def new_subinstruction(self, *, within_inames, happens_after, no_sync_with=None, predicates=None): if no_sync_with is None: no_sync_with = self.surrounding_no_sync_with @@ -145,7 +145,7 @@ def new_subinstruction(self, *, within_inames, depends_on, return replace(self, surrounding_within_inames=within_inames, - surrounding_depends_on=depends_on, + surrounding_depends_on=happens_after, surrounding_no_sync_with=no_sync_with, surrounding_predicates=predicates, @@ -159,7 +159,7 @@ def get_insn_kwargs(self): self.surrounding_within_inames | frozenset(self.surrounding_insn_add_within_inames)), "within_inames_is_final": True, - "depends_on": ( + "happens_after": ( self.surrounding_depends_on | frozenset(self.surrounding_insn_add_depends_on)), "no_sync_with": ( @@ -668,7 +668,8 @@ def _add_to_depends_on(insn_id, new_depends_on_params): insn = new_or_updated_instructions.get(insn_id, insn) new_or_updated_instructions[insn_id] = ( insn.copy( - depends_on=insn.depends_on | frozenset(new_depends_on_params))) + happens_after=insn.depends_on | frozenset( + new_depends_on_params))) # }}} @@ -737,7 +738,7 @@ def _add_to_depends_on(insn_id, new_depends_on_params): assignees=(assignee,), expression=new_assignee, id=new_assignment_id, - depends_on=frozenset([last_added_insn_id]), + happens_after=frozenset([last_added_insn_id]), depends_on_is_final=True, no_sync_with=( insn.no_sync_with | frozenset([(insn.id, "any")])), @@ -943,7 +944,7 @@ def expand_inner_reduction( id=id, assignees=temp_vars, expression=expr, - depends_on=depends_on, + happens_after=depends_on, within_inames=within_inames, within_inames_is_final=True, predicates=predicates) @@ -983,7 +984,7 @@ def map_reduction_seq(red_realize_ctx, expr, nresults, arg_dtypes, reduction_dty assignees=acc_vars, within_inames=red_realize_ctx.surrounding_within_inames, within_inames_is_final=True, - depends_on=frozenset(), + happens_after=frozenset(), expression=expression, # Do not inherit predicates: Those might read variables @@ -1005,7 +1006,7 @@ def map_reduction_seq(red_realize_ctx, expr, nresults, arg_dtypes, reduction_dty within_inames=( red_realize_ctx.surrounding_within_inames | frozenset(expr.inames)), - depends_on=( + happens_after=( frozenset({init_id}) | red_realize_ctx.surrounding_depends_on)) @@ -1155,7 +1156,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx.surrounding_within_inames | frozenset([base_exec_iname])), within_inames_is_final=True, - depends_on=frozenset(), + happens_after=frozenset(), # Do not inherit predicates: Those might read variables # that may not yet be set, and we don't have a great way # of figuring out what the dependencies of the accumulator @@ -1177,7 +1178,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx.surrounding_within_inames | frozenset([base_exec_iname])), within_inames_is_final=True, - depends_on=frozenset(), + happens_after={}, predicates=red_realize_ctx.surrounding_predicates, ) red_realize_ctx.additional_insns.append(init_neutral_insn) @@ -1188,7 +1189,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, within_inames=( red_realize_ctx.surrounding_within_inames | frozenset([red_iname])), - depends_on=( + happens_after=( red_realize_ctx.surrounding_depends_on | frozenset([init_id, init_neutral_id])), no_sync_with=( @@ -1286,7 +1287,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx.surrounding_within_inames | frozenset([stage_exec_iname])), within_inames_is_final=True, - depends_on=frozenset([prev_id]), + happens_after=frozenset([prev_id]), predicates=red_realize_ctx.surrounding_predicates, ) @@ -1416,7 +1417,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx.surrounding_within_inames - frozenset((scan_param.sweep_iname,) + expr.inames)), within_inames_is_final=True, - depends_on=init_insn_depends_on, + happens_after=init_insn_depends_on, expression=expression, # Do not inherit predicates: Those might read variables # that may not yet be set, and we don't have a great way @@ -1436,7 +1437,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, within_inames=( red_realize_ctx.surrounding_within_inames | frozenset({scan_param.scan_iname})), - depends_on=red_realize_ctx.surrounding_depends_on) + happens_after=red_realize_ctx.surrounding_depends_on) reduction_expr = red_realize_ctx.mapper( expr.expr, red_realize_ctx=scan_red_realize_ctx, @@ -1466,7 +1467,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, | frozenset( scan_red_realize_ctx.surrounding_insn_add_within_inames) | {track_iname}), - depends_on=( + happens_after=( frozenset(scan_insn_depends_on) | frozenset(scan_red_realize_ctx.surrounding_insn_add_depends_on) ), @@ -1579,7 +1580,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), within_inames_is_final=True, - depends_on=frozenset(), + happens_after=frozenset(), # Do not inherit predicates: Those might read variables # that may not yet be set, and we don't have a great way # of figuring out what the dependencies of the accumulator @@ -1599,7 +1600,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, within_inames=( red_realize_ctx.surrounding_within_inames | frozenset({scan_param.scan_iname})), - depends_on=red_realize_ctx.surrounding_depends_on) + happens_after=red_realize_ctx.surrounding_depends_on) reduction_expr = red_realize_ctx.mapper( expr.expr, red_realize_ctx=transfer_red_realize_ctx, @@ -1643,7 +1644,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, | transfer_red_realize_ctx.surrounding_insn_add_within_inames | frozenset({scan_param.sweep_iname})), within_inames_is_final=True, - depends_on=( + happens_after=( transfer_insn_depends_on | transfer_red_realize_ctx.surrounding_insn_add_depends_on), no_sync_with=( @@ -1684,7 +1685,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=True, - depends_on=prev_ids, + happens_after=prev_ids, predicates=red_realize_ctx.surrounding_predicates, ) @@ -1722,7 +1723,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=True, - depends_on=prev_ids, + happens_after=prev_ids, predicates=red_realize_ctx.surrounding_predicates, ) @@ -2013,7 +2014,7 @@ def realize_reduction_for_single_kernel(kernel, callables_table, | red_realize_ctx.surrounding_insn_add_within_inames)) kwargs.pop("id") - kwargs.pop("depends_on") + kwargs.pop("happens_after") kwargs.pop("expression") kwargs.pop("assignee", None) kwargs.pop("assignees", None) @@ -2028,7 +2029,7 @@ def realize_reduction_for_single_kernel(kernel, callables_table, replacement_insns = [ Assignment( id=result_assignment_ids[i], - depends_on=( + happens_after=( result_assignment_dep_on | (frozenset([result_assignment_ids[i-1]]) if i else frozenset())), @@ -2047,7 +2048,7 @@ def realize_reduction_for_single_kernel(kernel, callables_table, replacement_insns = [ make_assignment( id=insn.id, - depends_on=result_assignment_dep_on, + happens_after=result_assignment_dep_on, assignees=insn.assignees, expression=new_expr, **kwargs) @@ -2075,7 +2076,7 @@ def realize_reduction_for_single_kernel(kernel, callables_table, if global_barrier is not None: gb_dep = frozenset([global_barrier]) additional_insns = [addl_insn.copy( - depends_on=addl_insn.depends_on | gb_dep) + happens_after=addl_insn.depends_on | gb_dep) for addl_insn in additional_insns] # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 6bf1c1543..813efd2b1 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -561,7 +561,7 @@ def add_subscript_if_subscript_nonempty(agg, subscript=()): self.subkernel_to_surrounding_inames[subkernel] | frozenset(hw_inames + dim_inames)), within_inames_is_final=True, - depends_on=depends_on) + happens_after=depends_on) if mode == "save": self.temporary_to_save_ids[temporary].add(save_or_load_insn_id) @@ -575,7 +575,7 @@ def add_subscript_if_subscript_nonempty(agg, subscript=()): for insn_id in update_deps: insn = self.insns_to_update.get(insn_id, self.kernel.id_to_insn[insn_id]) self.insns_to_update[insn_id] = insn.copy( - depends_on=insn.depends_on | frozenset([save_or_load_insn_id])) + happens_after=insn.depends_on | frozenset([save_or_load_insn_id])) self.updated_temporary_variables[promoted_temporary.name] = ( promoted_temporary.as_kernel_temporary(self.kernel)) diff --git a/test/test_dependencies.py b/test/test_dependencies.py new file mode 100644 index 000000000..3860d38fd --- /dev/null +++ b/test/test_dependencies.py @@ -0,0 +1,50 @@ +__copyright__ = "Copyright (C) 2023 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import loopy as lp +from loopy.kernel.dependency import add_lexicographic_happens_after + + +def test_lex_dependencies(): + knl = lp.make_kernel( + [ + "{[a,b]: 0<=a,b<7}", + "{[i,j]: 0<=i,j 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 8efed778d..3d026d10b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2072,7 +2072,7 @@ def test_unscheduled_insn_detection(): prog = lp.linearize(prog) insn1, = lp.find_instructions(prog, "id:insn1") insns = prog["loopy_kernel"].instructions[:] - insns.append(insn1.copy(id="insn2", depends_on=frozenset({"insn1"}))) + insns.append(insn1.copy(id="insn2", happens_after=frozenset({"insn1"}))) prog = prog.with_kernel(prog["loopy_kernel"].copy(instructions=insns)) from loopy.diagnostic import UnscheduledInstructionError