Skip to content

Commit

Permalink
Trace: Generate Communication Matrix (#12)
Browse files Browse the repository at this point in the history
* tracedata schema

* cleaned up communication matrix function and added documentation

* docs: specify sender/receiver dimensions in top level function comment

Co-authored-by: Abhinav Bhatele <[email protected]>
  • Loading branch information
adityaranjan and bhatele committed Oct 14, 2022
1 parent 8d20284 commit 09bf9a5
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions pipit/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: MIT

import numpy as np


class Trace:
"""A trace dataset is read into an object of this type, which includes one
Expand All @@ -29,3 +31,77 @@ def from_hpctoolkit(dirname):
from .readers.hpctoolkit_reader import HPCToolkitReader

return HPCToolkitReader(dirname).read()

def comm_matrix(self, comm_type="bytes"):
"""
Communication Matrix for Peer-to-Peer (P2P) MPI messages
Arguments:
1) comm_type -
string to choose whether the communication volume should be measured
by bytes transferred between two processes or the number of messages
sent (two choices - "bytes" or "counts")
Returns:
A 2D Numpy Array that represents the communication matrix for all P2P
messages of the given trace
Note:
The first dimension of the returned 2d array
is senders and the second dimension is receivers
ex) comm_matrix[sender_rank][receiver_rank]
"""

# get the list of ranks/process ids
# (mpi messages are sent between processes)
ranks = set(
self.events.loc[self.events["Location Group Type"] == "PROCESS"][
"Location Group ID"
]
)

# create a 2d numpy array that will be returned
# at the end of the function
communication_matrix = np.zeros(shape=(len(ranks), len(ranks)))

# filter the dataframe by MPI Send and Isend events
sender_dataframe = self.events.loc[
self.events["Event Type"].isin(["MpiSend", "MpiIsend"]),
["Location Group ID", "Attributes"],
]

# get the mpi ranks of all the sender processes
sender_ranks = sender_dataframe["Location Group ID"].to_list()

# get the corresponding mpi ranks of the receivers
receiver_ranks = (
sender_dataframe["Attributes"]
.apply(lambda attrDict: attrDict["receiver"])
.to_list()
)

# number of bytes communicated
if comm_type == "bytes":
# (1 communication is a single row in the sender dataframe)
message_volumes = (
sender_dataframe["Attributes"]
.apply(lambda attrDict: attrDict["msg_length"])
.to_list()
)
elif comm_type == "counts":
# 1 message between the pairs of processes
# for each row in the sender dataframe
message_volumes = np.full(len(sender_dataframe), 1)

for i in range(len(sender_ranks)):
"""
loops through all the communication events and adds the
message volumes to the corresponding entry of the 2d array
using the sender and receiver ranks
"""
communication_matrix[sender_ranks[i], receiver_ranks[i]] += message_volumes[
i
]

return communication_matrix

0 comments on commit 09bf9a5

Please sign in to comment.