Skip to content

Commit

Permalink
Merge pull request #36 from mochi-hpc/carns/dev-multi-server
Browse files Browse the repository at this point in the history
add support for multiple servers
  • Loading branch information
carns authored Nov 5, 2024
2 parents daece94 + 96e779d commit 25b3dec
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 27 deletions.
83 changes: 58 additions & 25 deletions src/quintain-benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <abt.h>
#include <quintain-client.h>
#include <flock/flock-group-view.h>
#include <flock/flock-group.h>

#include "quintain-macros.h"
#include "bedrock-c-wrapper.h"
Expand Down Expand Up @@ -69,7 +70,9 @@ int main(int argc, char** argv)
margo_instance_id mid = MARGO_INSTANCE_NULL;
quintain_client_t qcl = QTN_CLIENT_NULL;
quintain_provider_handle_t qph = QTN_PROVIDER_HANDLE_NULL;
flock_group_handle_t fh = FLOCK_GROUP_HANDLE_NULL;
bedrock_client_t bcl = NULL;
flock_client_t fcl = FLOCK_CLIENT_NULL;
bedrock_service_t bsh = NULL;
hg_addr_t svr_addr = HG_ADDR_NULL;
struct options opts;
Expand Down Expand Up @@ -120,6 +123,7 @@ int main(int argc, char** argv)

/* find transport to initialize margo to match provider */
svr_addr_str = group_view.members.data[0].address;
provider_id = group_view.members.data[0].provider_id;
for (int i = 0; i < 64 && svr_addr_str[i] != ':'; ++i)
proto[i] = svr_addr_str[i];

Expand All @@ -138,37 +142,64 @@ int main(int argc, char** argv)
goto err_flock_cleanup;
}

/* get the number of providers */
nproviders = group_view.members.size;
// Note: here we assume that the file we loaded is the current
// representation of the group; if this could not be the case, we should
// create a Flock client, a Flock group handle, and do an update after
// loading the group handle from the file.

if (nproviders == 0) {
fprintf(stderr, "Error: flock group has no members.\n");
goto err_margo_cleanup;
} else if (nproviders > 1) {
fprintf(stderr,
"# Warning: flock group of size %d detected, but the quintain "
"benchmark presently only supports issuing RPCs to the first "
"member.\n",
nproviders);
}

/* refresh view of servers */
MPI_Barrier(MPI_COMM_WORLD);

/* initialize a Flock client and refresh the view in case it diverges
* from what was in the initial group file
*/
ret = margo_addr_lookup(mid, svr_addr_str, &svr_addr);
if (ret != HG_SUCCESS) {
fprintf(stderr, "Error: margo_addr_lookup()\n");
goto err_margo_cleanup;
}

ret = flock_client_init(mid, ABT_POOL_NULL, &fcl);
if (ret != FLOCK_SUCCESS) {
fprintf(stderr, "Error: flock_client_init() failure.\n");
goto err_flock_cleanup;
}

ret = flock_group_handle_create(fcl, svr_addr, provider_id, true, &fh);
if (ret != FLOCK_SUCCESS) {
fprintf(stderr, "Error: flock_group_handle_create() failure.\n");
goto err_flock_cleanup;
}

ret = flock_group_update_view(fh, NULL);
if (ret != FLOCK_SUCCESS) {
fprintf(stderr, "Error: flock_group_update_view() failure.\n");
goto err_flock_cleanup;
}

ret = flock_group_get_view(fh, &group_view);
if (ret != FLOCK_SUCCESS) {
fprintf(stderr, "Error: flock_group_get_view() failure.\n");
goto err_flock_cleanup;
}

/* get the number of providers */
nproviders = flock_group_view_member_count(&group_view);
if (nproviders == 0) {
fprintf(stderr, "Error: flock group has no members.\n");
goto err_flock_cleanup;
}

ret = bedrock_client_init(mid, &bcl);
if (ret != BEDROCK_SUCCESS) {
fprintf(stderr, "Error: bedrock_client_init() failure.\n");
goto err_margo_cleanup;
goto err_flock_cleanup;
}

/* each benchmark process selects exactly one server to contact */
svr_addr_str = group_view.members.data[nranks % nproviders].address;
provider_id = group_view.members.data[nranks % nproviders].provider_id;

/* resolve address to target server */
margo_addr_free(mid, svr_addr);
ret = margo_addr_lookup(mid, svr_addr_str, &svr_addr);
if (ret != HG_SUCCESS) {
fprintf(stderr, "Error: margo_addr_lookup()\n");
goto err_flock_cleanup;
}

ret = bedrock_service_handle_create(bcl, svr_addr_str, 0, &bsh);
Expand Down Expand Up @@ -369,7 +400,7 @@ int main(int argc, char** argv)
/* add new one, derived at run time */
json_object_object_add(json_cfg, "margo", margo_config);

gzprintf(f, "\"quintain-provider\" : %s\n",
gzprintf(f, "\"quintain-provider (first of %d)\" : %s\n", nproviders,
json_object_to_json_string_ext(
svr_config,
JSON_C_TO_STRING_PRETTY | JSON_C_TO_STRING_NOSLASHESCAPE));
Expand Down Expand Up @@ -421,8 +452,8 @@ int main(int argc, char** argv)
svr_stime, svr_alltime);
}
gzprintf(f, "# client_stats\t<rank>\t<utime>\t<stime>\t<alltime>\n");
gzprintf(f, "client_stats\t%d\t%.9f\t%.9f\t%.9f\n", 0, cli_utime, cli_stime,
cli_alltime);
gzprintf(f, "client_stats\t%d\t%.9f\t%.9f\t%.9f\n", my_rank, cli_utime,
cli_stime, cli_alltime);

if (f) {
gzclose(f);
Expand Down Expand Up @@ -472,14 +503,16 @@ int main(int argc, char** argv)
if (samples) munmap(samples, MAX_SAMPLES * sizeof(double));
if (qph != QTN_PROVIDER_HANDLE_NULL) quintain_provider_handle_release(qph);
if (qcl != QTN_CLIENT_NULL) quintain_client_finalize(qcl);
err_flock_cleanup:
flock_group_view_clear(&group_view);
if (fh != FLOCK_GROUP_HANDLE_NULL) flock_group_handle_release(fh);
if (fcl != FLOCK_CLIENT_NULL) flock_client_finalize(fcl);
err_br_cleanup:
if (bsh != NULL) bedrock_service_handle_destroy(bsh);
if (bcl != NULL) bedrock_client_finalize(bcl);
err_margo_cleanup:
if (svr_addr != HG_ADDR_NULL) margo_addr_free(mid, svr_addr);
margo_finalize(mid);
err_flock_cleanup:
flock_group_view_clear(&group_view);
err_mpi_cleanup:
if (json_cfg) json_object_put(json_cfg);
if (svr_config) json_object_put(svr_config);
Expand Down
7 changes: 5 additions & 2 deletions tests/Makefile.subdir
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ TESTS_ENVIRONMENT += \
check_PROGRAMS +=

TESTS += \
tests/basic.sh
tests/basic.sh\
tests/multi.sh

EXTRA_DIST += \
tests/basic.sh \
tests/mochi-quintain-provider.json\
tests/quintain-benchmark-example.json
tests/quintain-benchmark-example.json\
tests/mochi-quintain-provider-2svr-A.json\
tests/mochi-quintain-provider-2svr-B.json

DISTCLEANFILES += \
test-output.gz \
Expand Down
34 changes: 34 additions & 0 deletions tests/mochi-quintain-provider-2svr-A.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"margo" : {
},
"libraries" : [
"libquintain-bedrock.so",
"libflock-bedrock-module.so"
],
"providers" : [
{
"name" : "my_quintain_provider",
"type" : "quintain",
"provider_id" : 1,
"dependencies": {
"pool" : "__primary__"
},
"config" : {}
},
{
"name" : "quintain_group",
"type" : "flock",
"provider_id" : 2,
"dependencies": {
"pool" : "__primary__"
},
"config": {
"bootstrap": "self",
"file": "./quintain.flock.json",
"group": {
"type": "centralized"
}
}
}
]
}
34 changes: 34 additions & 0 deletions tests/mochi-quintain-provider-2svr-B.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"margo" : {
},
"libraries" : [
"libquintain-bedrock.so",
"libflock-bedrock-module.so"
],
"providers" : [
{
"name" : "my_quintain_provider",
"type" : "quintain",
"provider_id" : 1,
"dependencies": {
"pool" : "__primary__"
},
"config" : {}
},
{
"name" : "quintain_group",
"type" : "flock",
"provider_id" : 2,
"dependencies": {
"pool" : "__primary__"
},
"config": {
"bootstrap": "join",
"file": "./quintain.flock.json",
"group": {
"type": "centralized"
}
}
}
]
}
20 changes: 20 additions & 0 deletions tests/multi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

set -e
set -o pipefail

if [ -z $srcdir ]; then
echo srcdir variable not set.
exit 1
fi

export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/src/.libs"

bedrock -c $srcdir/tests/mochi-quintain-provider-2svr-A.json na+sm:// &
sleep 2
bedrock -c $srcdir/tests/mochi-quintain-provider-2svr-B.json na+sm:// &
sleep 2

mpiexec -n 2 src/quintain-benchmark -g quintain.flock.json -j $srcdir/tests/quintain-benchmark-example.json -o test-output

bedrock-shutdown -f quintain.flock.json na+sm://

0 comments on commit 25b3dec

Please sign in to comment.