diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 55d589c2..0a7e02d0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,7 +17,7 @@ jobs: - system: LEGION - system: PYGION - system: REGENT - # - system: REALM + - system: REALM - system: STARPU hwloc: 1 - system: PARSEC diff --git a/realm/main.cc b/realm/main.cc index 9dabdb3a..686b1352 100644 --- a/realm/main.cc +++ b/realm/main.cc @@ -112,6 +112,14 @@ DECLARE_REDUCTION(RedopMax, unsigned long long, unsigned long long, Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -810,7 +818,7 @@ void shard_task(const void *args, size_t arglen, const void *userdata, .at(point - first_point) .at(dep) .at(slot), - fid, graph.output_bytes_per_task, + fid, sizeof(char), task_postcondition); copy_postconditions.at(point - first_point).at(fid - FID_FIRST).push_back(postcondition); } @@ -917,7 +925,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m; diff --git a/realm_old/main.cc b/realm_old/main.cc index 19f55848..2e5c7616 100644 --- a/realm_old/main.cc +++ b/realm_old/main.cc @@ -138,6 +138,14 @@ long num_rev_dependencies(TaskGraph &graph, long dset, long taskid) Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -879,7 +887,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m; diff --git a/realm_subgraph/main.cc b/realm_subgraph/main.cc index 2b6d02da..4a3ee289 100644 --- a/realm_subgraph/main.cc +++ b/realm_subgraph/main.cc @@ -112,6 +112,14 @@ DECLARE_REDUCTION(RedopMax, unsigned long long, unsigned long long, Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -135,6 +143,14 @@ Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, SubgraphDefinition::CopyDesc copy_desc(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -424,7 +440,7 @@ static Event define_subgraph(Subgraph &subgraph, .at(point - first_point) .at(dep) .at(slot), - fid, graph.output_bytes_per_task)); + fid, sizeof(char))); copy_postconditions.at(fid - FID_FIRST).push_back(copy_postcondition); @@ -771,6 +787,20 @@ void shard_task(const void *args, size_t arglen, const void *userdata, // Figure out who we're going to be communicating with. + for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) { + auto graph = graphs.at(graph_index); + + long first_point = proc_index * graph.max_width / num_procs; + long last_point = (proc_index + 1) * graph.max_width / num_procs - 1; + + for (long point = first_point; point <= last_point; ++point) { + task_results.at(graph_index).at(point).fetch_metadata(p).wait(); + task_inputs.at(graph_index).at(point).fetch_metadata(p).wait(); + raw_exchange.at(graph_index).at(point).fetch_metadata(p).wait(); + war_exchange.at(graph_index).at(point).fetch_metadata(p).wait(); + } + } + // graph -> point -> [remote point] std::vector > > raw_exchange_points(graphs.size()); std::vector > > war_exchange_points(graphs.size()); @@ -1301,7 +1331,7 @@ void shard_task(const void *args, size_t arglen, const void *userdata, } } - if (!subgraph.exists()) { + if (subgraph.exists()) { subgraph.destroy(postcondition); } } @@ -1366,7 +1396,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m;