From 00bd4aa0150e013766d456d03a83a94a3b7afdf6 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Wed, 10 May 2023 12:42:23 -0700 Subject: [PATCH 1/5] realm: fix the realm proc memory affinity --- realm/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/realm/main.cc b/realm/main.cc index 9dabdb3a..5037d90e 100644 --- a/realm/main.cc +++ b/realm/main.cc @@ -917,7 +917,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m; From 86faa64e50a72653bf86c4a3032244df633b4224 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Wed, 10 May 2023 13:54:34 -0700 Subject: [PATCH 2/5] realm: fetch metadata if instances are created on remote nodes --- realm/main.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/realm/main.cc b/realm/main.cc index 5037d90e..8d2f5229 100644 --- a/realm/main.cc +++ b/realm/main.cc @@ -112,6 +112,18 @@ DECLARE_REDUCTION(RedopMax, unsigned long long, unsigned long long, Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + Event event_fetch_dst = Event::NO_EVENT; + if (dst_inst.address_space() != current_proc.address_space()) { + event_fetch_dst = dst_inst.fetch_metadata(current_proc); + event_fetch_dst.wait(); + } + Event event_fetch_src = Event::NO_EVENT; + if (src_inst.address_space() != current_proc.address_space()) { + event_fetch_src = src_inst.fetch_metadata(current_proc); + event_fetch_src.wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -810,7 +822,7 @@ void shard_task(const void *args, size_t arglen, const void *userdata, .at(point - first_point) .at(dep) .at(slot), - fid, graph.output_bytes_per_task, + fid, sizeof(char), task_postcondition); copy_postconditions.at(point - first_point).at(fid - FID_FIRST).push_back(postcondition); } From 90748f35103cdfe4e7aaf99ca753c9982c2d0ab8 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Wed, 10 May 2023 18:14:38 -0700 Subject: [PATCH 3/5] realm: fix subgraph --- realm/main.cc | 8 ++------ realm_subgraph/main.cc | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/realm/main.cc b/realm/main.cc index 8d2f5229..686b1352 100644 --- a/realm/main.cc +++ b/realm/main.cc @@ -113,15 +113,11 @@ Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size, Event wait_for) { Processor current_proc = ThreadLocal::current_processor; - Event event_fetch_dst = Event::NO_EVENT; if (dst_inst.address_space() != current_proc.address_space()) { - event_fetch_dst = dst_inst.fetch_metadata(current_proc); - event_fetch_dst.wait(); + dst_inst.fetch_metadata(current_proc).wait(); } - Event event_fetch_src = Event::NO_EVENT; if (src_inst.address_space() != current_proc.address_space()) { - event_fetch_src = src_inst.fetch_metadata(current_proc); - event_fetch_src.wait(); + src_inst.fetch_metadata(current_proc).wait(); } CopySrcDstField src_field; diff --git a/realm_subgraph/main.cc b/realm_subgraph/main.cc index 2b6d02da..4a3ee289 100644 --- a/realm_subgraph/main.cc +++ b/realm_subgraph/main.cc @@ -112,6 +112,14 @@ DECLARE_REDUCTION(RedopMax, unsigned long long, unsigned long long, Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -135,6 +143,14 @@ Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, SubgraphDefinition::CopyDesc copy_desc(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, size_t value_size) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -424,7 +440,7 @@ static Event define_subgraph(Subgraph &subgraph, .at(point - first_point) .at(dep) .at(slot), - fid, graph.output_bytes_per_task)); + fid, sizeof(char))); copy_postconditions.at(fid - FID_FIRST).push_back(copy_postcondition); @@ -771,6 +787,20 @@ void shard_task(const void *args, size_t arglen, const void *userdata, // Figure out who we're going to be communicating with. + for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) { + auto graph = graphs.at(graph_index); + + long first_point = proc_index * graph.max_width / num_procs; + long last_point = (proc_index + 1) * graph.max_width / num_procs - 1; + + for (long point = first_point; point <= last_point; ++point) { + task_results.at(graph_index).at(point).fetch_metadata(p).wait(); + task_inputs.at(graph_index).at(point).fetch_metadata(p).wait(); + raw_exchange.at(graph_index).at(point).fetch_metadata(p).wait(); + war_exchange.at(graph_index).at(point).fetch_metadata(p).wait(); + } + } + // graph -> point -> [remote point] std::vector > > raw_exchange_points(graphs.size()); std::vector > > war_exchange_points(graphs.size()); @@ -1301,7 +1331,7 @@ void shard_task(const void *args, size_t arglen, const void *userdata, } } - if (!subgraph.exists()) { + if (subgraph.exists()) { subgraph.destroy(postcondition); } } @@ -1366,7 +1396,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m; From 2bb52cd66f8e1d00cfd498f64cacaea8fdc6bcb6 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Wed, 10 May 2023 18:20:31 -0700 Subject: [PATCH 4/5] realm: fix realm old --- realm_old/main.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/realm_old/main.cc b/realm_old/main.cc index 19f55848..2e5c7616 100644 --- a/realm_old/main.cc +++ b/realm_old/main.cc @@ -138,6 +138,14 @@ long num_rev_dependencies(TaskGraph &graph, long dset, long taskid) Event copy(RegionInstance src_inst, RegionInstance dst_inst, FieldID fid, Event wait_for) { + Processor current_proc = ThreadLocal::current_processor; + if (dst_inst.address_space() != current_proc.address_space()) { + dst_inst.fetch_metadata(current_proc).wait(); + } + if (src_inst.address_space() != current_proc.address_space()) { + src_inst.fetch_metadata(current_proc).wait(); + } + CopySrcDstField src_field; src_field.inst = src_inst; src_field.field_id = fid; @@ -879,7 +887,7 @@ void top_level_task(const void *args, size_t arglen, const void *userdata, for (size_t i = 0; i < proc_mem_affinities.size(); ++i) { Machine::ProcessorMemoryAffinity &affinity = proc_mem_affinities[i]; if (affinity.p.kind() == Processor::LOC_PROC) { - if (affinity.m.kind() == Memory::SYSTEM_MEM) { + if (affinity.m.kind() == Memory::SYSTEM_MEM && affinity.m.capacity() > 0) { proc_sysmems[affinity.p] = affinity.m; if (proc_regmems.find(affinity.p) == proc_regmems.end()) proc_regmems[affinity.p] = affinity.m; From 80e3fd8dac22464fba2158fafd231d3ee99ed68c Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Wed, 10 May 2023 21:57:22 -0700 Subject: [PATCH 5/5] enable realm ci --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 749e31d7..9090ccaf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,7 +17,7 @@ jobs: - system: LEGION - system: PYGION - system: REGENT - # - system: REALM + - system: REALM - system: STARPU hwloc: 1 - system: PARSEC