Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nonblocking handles with RMA requests #53

Draft
wants to merge 41 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
cfbcf7c
add unified attribute to gmr_t
jeffhammond May 30, 2024
bf18e31
use stdbool; set gmr unified attribute
jeffhammond May 30, 2024
6c7082c
remove redundant print check; return early to avoid unnecessary MPI_D…
jeffhammond May 30, 2024
2012fa5
only call MPI_Win_sync with separate; cleanup code
jeffhammond May 30, 2024
010fa74
cleanup calls to gmr_sync
jeffhammond May 30, 2024
206f922
cleanup calls to gmr_sync
jeffhammond May 30, 2024
18ae892
cleanup comments (remove MPI-2 related)
jeffhammond May 30, 2024
c6760bf
use ARMCII_Is_win_unified
jeffhammond May 30, 2024
0472b1f
suppress printing by default
jeffhammond May 30, 2024
3cd1307
add MPI_Win_sync to msg_barrier
jeffhammond May 31, 2024
4dbf8c5
print value of ARMCI_MSG_BARRIER_SYNCS in ARMCI_VERBOSE
jeffhammond May 31, 2024
f2bb91c
fix race in sync (w.r.t. incoming remote RMA)
jeffhammond Aug 21, 2024
d59cc01
hoist code out of conditional
jeffhammond Sep 27, 2024
6bcf720
start workoing on request-based RMA (again)
jeffhammond Sep 27, 2024
616b954
1) do not set aggregate initially (2) fix warning text
jeffhammond Sep 30, 2024
afad304
small changes to design
jeffhammond Sep 30, 2024
25b841a
add feature to use request-based RMA in atomics
jeffhammond Sep 30, 2024
67c5c01
merge gmr-extras.c into gmr.c
jeffhammond Sep 30, 2024
7d81940
add README for ARMCI_USE_REQUEST_ATOMICS
jeffhammond Sep 30, 2024
a1e7438
add a nonblocking request handle to all put/get/acc operations in gmr
jeffhammond Sep 30, 2024
a0bb8ac
more request prep
jeffhammond Sep 30, 2024
2df01b7
implement request-based RMA
jeffhammond Oct 1, 2024
fb33c92
fix strict aliasing rule violation that offends address sanitizer
jeffhammond Oct 1, 2024
77a1165
fix request array append
jeffhammond Oct 3, 2024
57c5b1f
formatting and C99 loops
jeffhammond Oct 3, 2024
350ca30
formatting and C99 loops
jeffhammond Oct 3, 2024
ce040ee
add just-flushall code path for when things are weird
jeffhammond Oct 3, 2024
2aa7679
default to request-based atomics; print whether handles use RMA requests
jeffhammond Oct 3, 2024
6c17513
turn off just_flushall; use Waitall again
jeffhammond Oct 3, 2024
be80b36
remove just_flushall code path; allow inactive handles but warn
jeffhammond Oct 3, 2024
ec3bf2f
whitespace and code motion
jeffhammond Oct 3, 2024
37a77e8
allow OMPI 5 to use DIRECT by default
jeffhammond Aug 22, 2024
fcd0d03
cleanup/finish nonblocking handle stuff
jeffhammond Oct 3, 2024
bc45c07
workaround bug
jeffhammond Oct 3, 2024
03bcfed
remove just_flushall
jeffhammond Oct 3, 2024
4400739
relax assertions in PARMCI_Wait related to uninitialized handles
jeffhammond Oct 3, 2024
6b6758d
fixed many incorrect/spurious warnings
jeffhammond Oct 4, 2024
2f3466e
remove printf
jeffhammond Oct 4, 2024
40aa5f0
these warnings appear to generate false positives
jeffhammond Oct 4, 2024
ca61c6b
reorganize init code so Warning works better
jeffhammond Oct 14, 2024
2d2cbd0
Merge branch 'master' into request-based-rma
jeffhammond Oct 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ libarmci_la_SOURCES = src/buffer.c \
src/internals.c \
src/malloc.c \
src/gmr.c \
src/gmr-extras.c \
src/message.c \
src/message_gop.c \
src/mutex.c \
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ Boolean environment variables are enabled when set to a value beginning with

Argument to `usleep()` to pause the progress polling loop.

`ARMCI_USE_REQUEST_ATOMICS` (boolean)

Switch to request-based RMA (with Rget_accumulate) instead of
Fetch_and_op/Compare_and_swap plus a local flush.

## Noncollective Groups

`ARMCI_NONCOLLECTIVE_GROUPS` (boolean)
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ if test "$ac_cv_prog_cc_c99" = "no" ; then
AC_ERROR([C99 not supported by the compiler])
fi

AC_CHECK_HEADERS([execinfo.h string.h strings.h stdint.h inttypes.h unistd.h errno.h time.h sys/time.h])
AC_CHECK_HEADERS([execinfo.h string.h strings.h stdint.h stdbool.h inttypes.h unistd.h errno.h time.h sys/time.h])
AC_TYPE_UINT8_T

# asynchronous progress
Expand Down
10 changes: 9 additions & 1 deletion src/armci.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#ifndef _ARMCI_H_
#define _ARMCI_H_

// TODO add to build system
#define USE_RMA_REQUESTS 1

#include <mpi.h>

#define ARMCI_MPI 3
Expand Down Expand Up @@ -64,11 +67,16 @@ int ARMCI_PutS_flag(void *src_ptr, int src_stride_ar[/*stride_levels*/],
int count[/*stride_levels+1*/], int stride_levels,
int *flag, int value, int proc);


typedef struct armci_hdl_s
{
#ifdef USE_RMA_REQUESTS
int batch_size;
MPI_Request single_request; // used when batch_size=0 (common case)
MPI_Request *request_array; // used when batch_size>0
#else
int target; /* we do not actually support individual completion */
int aggregate;
#endif
}
armci_hdl_t;

Expand Down
11 changes: 8 additions & 3 deletions src/armci_internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,15 @@ typedef struct {
int progress_usleep; /* Argument to usleep() to throttling polling */
#endif
int use_win_allocate; /* Use win_allocate or win_create (or special memory...) */
int msg_barrier_syncs; /* Call MPI_Win_sync in armci_msg_barrier */
int explicit_nb_progress; /* Poke the MPI progress engine at the end of nonblocking (NB) calls */
int use_alloc_shm; /* Pass alloc_shm info to win_allocate / alloc_mem */
int rma_atomicity; /* Use Accumulate and Get_accumulate for Put and Get */
int end_to_end_flush; /* All flush_local calls become flush */
int rma_nocheck; /* Use MPI_MODE_NOCHECK on synchronization calls that take assertion */
int disable_shm_accumulate; /* Set the disable_shm_accumulate window info key to true */
int use_same_op; /* Set accumulate_ops=same_op window info key */
int use_request_atomics; /* Use request-based RMA for atomic operations */
char rma_ordering[20]; /* Set accumulate_ordering=<this> window info key */

size_t memory_limit; /* upper bound on how much memory ARMCI can allocate */
Expand Down Expand Up @@ -201,12 +203,12 @@ void ARMCII_Strided_to_dtype(int stride_array[/*stride_levels*/], int count[/*st
int stride_levels, MPI_Datatype old_type, MPI_Datatype *new_type);

int ARMCII_Iov_op_dispatch(enum ARMCII_Op_e op, void **src, void **dst, int count, int size,
int datatype, int overlapping, int same_alloc, int proc, int blocking);
int datatype, int overlapping, int same_alloc, int proc, int blocking, armci_hdl_t * handle);

int ARMCII_Iov_op_batched(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count,
MPI_Datatype type, int proc, int consrv /* if 1, batched = safe */, int blocking);
MPI_Datatype type, int proc, int consrv /* if 1, batched = safe */, int blocking, armci_hdl_t * handle);
int ARMCII_Iov_op_datatype(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count,
MPI_Datatype type, int proc, int blocking);
MPI_Datatype type, int proc, int blocking, armci_hdl_t * handle);

armcii_iov_iter_t *ARMCII_Strided_to_iov_iter(
void *src_ptr, int src_stride_ar[/*stride_levels*/],
Expand All @@ -230,4 +232,7 @@ void ARMCII_Buf_finish_write_vec(void **orig_bufs, void **new_bufs, int count, i
int ARMCII_Buf_acc_is_scaled(int datatype, void *scale);
void ARMCII_Buf_acc_scale(void *buf_in, void *buf_out, int size, int datatype, void *scale);

int ARMCII_Is_win_unified(MPI_Win win);
void ARMCII_Sync(void);

#endif /* HAVE_ARMCI_INTERNALS_H */
212 changes: 0 additions & 212 deletions src/gmr-extras.c

This file was deleted.

Loading