Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New API: tw_opt_set #163

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)
SET(CLOCK aarch64)
ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)

IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l)
SET(VALID_ARCH YES)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
ADD_DEFINITIONS(-D_GNU_SOURCE)
SET(CLOCK armv7l)
ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l)

IF(VALID_ARCH)
MESSAGE(STATUS "System architecture detected: ${CMAKE_SYSTEM_PROCESSOR}")
Expand All @@ -140,7 +146,7 @@ IF(VALID_ARCH)
ELSE(VALID_ARCH)
MESSAGE(FATAL_ERROR "System architecture not recognized!\n"
"Found: ${CMAKE_SYSTEM_PROCESSOR}\n"
"Expected: i386 | x86_64 | ppc64 | ppc64le | aarch64")
"Expected: i386 | x86_64 | ppc64 | ppc64le | aarch64 | armv7l")
ENDIF(VALID_ARCH)

## MPI
Expand Down
48 changes: 48 additions & 0 deletions core/clock/armv7l.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
This implementation of an ARM v7 clock reader utilizes the
Performance Monitoring Unit (PMU) on Cortex-A7 chips.
Unfortunately, access to the cycle counter from userspace
is disabled by default. A kernel module that enables access
from userspace is required or the system will fault.

An example kernel module that does just that can be found:
https://github.com/nmcglohon/armv7l-userspace-counter.git

More information can be found:
http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html
*/

#include <ross.h>

#ifndef __GNUC__
# error gcc asm extensions required
#endif
#if ! (defined(__arm__))
# error only 32 bit arm platform supported
#endif

static const tw_optdef clock_opts [] =
{
TWOPT_GROUP("ROSS Timing"),
TWOPT_STIME("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
TWOPT_END()
};

const tw_optdef *tw_clock_setup(void)
{
return clock_opts;
}


void tw_clock_init(tw_pe * me)
{
me->clock_time = 0;
me->clock_offset = tw_clock_read();
}


tw_clock tw_clock_now(tw_pe * me)
{
me->clock_time = tw_clock_read() - me->clock_offset;
return me->clock_time;
}
32 changes: 32 additions & 0 deletions core/clock/armv7l.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
This implementation of an ARM v7 clock reader utilizes the
Performance Monitoring Unit (PMU) on Cortex-A7 chips.
Unfortunately, access to the cycle counter from userspace
is disabled by default. A kernel module that enables access
from userspace is required or the system will fault.

An example kernel module that does just that can be found:
https://github.com/nmcglohon/armv7l-userspace-counter.git

More information can be found:
http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html
*/

#ifndef INC_clock_armv7l
#define INC_clock_armv7l

typedef unsigned int tw_clock;

static inline tw_clock tw_clock_read(void)
{
unsigned int result;
#ifdef ROSS_timing
do {
__asm__ __volatile__ ("MRC p15, 0, %0, c9, c13, 0" : "=r"(result));
} while (__builtin_expect ((int) result == -1, 0));
#endif

return result;
}

#endif
39 changes: 18 additions & 21 deletions core/network-mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ int custom_communicator = 0;
/**
* @struct act_q
* @brief Keeps track of posted send or recv operations.
*
* This list structure is used *only* by the network mpi layer (this
* file). Within this file, two lists are used, for MPI Irecv and
* Isend requests. The MPI requests and statusus are linked with an
* event buffer through this struct.
*/
struct act_q
{
const char *name;
const char *name; /**< name of the list, used in error printouts */

tw_event **event_list; /**< list of event pointers in this queue */
MPI_Request *req_list; /**< list of MPI request handles */
Expand All @@ -27,8 +32,8 @@ static struct act_q posted_sends;
static struct act_q posted_recvs;
static tw_eventq outq;

static unsigned int read_buffer = 16;
static unsigned int send_buffer = 1024;
static unsigned int read_buffer = 16; /**< Number of Irecv's to buffer, length of posted_recvs queue */
static unsigned int send_buffer = 1024; /**< Number of Isend's to buffer, length of posted_sends queue */
static int world_size = 1;

static const tw_optdef mpi_opts[] = {
Expand Down Expand Up @@ -85,20 +90,13 @@ tw_net_init(int *argc, char ***argv)
* @param[in] name name of the queue
*/
static void
init_q(struct act_q *q, const char *name)
init_q(struct act_q *q, const char *name, unsigned int size)
{
unsigned int n;

if(q == &posted_sends)
n = send_buffer;
else
n = read_buffer;

q->name = name;
q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(*q->event_list), n);
q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(*q->req_list), n);
q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(*q->idx_list), n);
q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(*q->status_list), n);
q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(*q->event_list), size);
q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(*q->req_list), size);
q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(*q->idx_list), size);
q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(*q->status_list), size);
}

unsigned int
Expand Down Expand Up @@ -140,13 +138,12 @@ tw_net_start(void)
g_tw_pe->hash_t = NULL;
}

if (send_buffer < 1)
tw_error(TW_LOC, "network send buffer must be >= 1");
if (read_buffer < 1)
tw_error(TW_LOC, "network read buffer must be >= 1");
// these values are command line options
if (send_buffer < 1) tw_error(TW_LOC, "network send buffer must be >= 1");
if (read_buffer < 1) tw_error(TW_LOC, "network read buffer must be >= 1");

init_q(&posted_sends, "MPI send queue");
init_q(&posted_recvs, "MPI recv queue");
init_q(&posted_sends, "MPI send queue", send_buffer);
init_q(&posted_recvs, "MPI recv queue", read_buffer);

g_tw_net_device_size = read_buffer;

Expand Down
3 changes: 3 additions & 0 deletions core/ross.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ typedef uint64_t tw_lpid;
#ifdef ROSS_CLOCK_aarch64
# include "clock/aarch64.h"
#endif
#ifdef ROSS_CLOCK_armv7l
# include "clock/armv7l.h"
#endif

#include "tw-timing.h"
#include "ross-types.h"
Expand Down
1 change: 0 additions & 1 deletion core/tw-eventq.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ tw_eventq_alloc(tw_eventq * q, unsigned int cnt)
g_tw_event_msg_sz = event_len;

// compute number of events needed for the network.
g_tw_gvt_threshold = (int) ceil(g_tw_net_device_size / g_tw_event_msg_sz);
g_tw_gvt_threshold = g_tw_net_device_size;
g_tw_events_per_pe += g_tw_gvt_threshold;
cnt += g_tw_gvt_threshold;
Expand Down
37 changes: 34 additions & 3 deletions core/tw-opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ static int is_empty(const tw_optdef *def);
static const tw_optdef *opt_groups[10];
static unsigned int opt_index = 0;

// internally set options registered with tw_opt_set
#define I_ARGV_MAX 16
static int i_argc = 0;
static char * i_argv[I_ARGV_MAX];

void
tw_opt_add(const tw_optdef *options)
{
Expand Down Expand Up @@ -121,7 +126,7 @@ show_help(void)
cnt++;
}
}

// CMake used to pass options by command line flags
fprintf(stderr, "ROSS CMake Configuration Options:\n");
fprintf(stderr, " (See build-dir/core/config.h)\n");
Expand All @@ -130,7 +135,7 @@ show_help(void)
void tw_opt_settings(FILE *outfile) {
const tw_optdef **group = all_groups;
unsigned cnt = 0;

for (; *group; group++){
const tw_optdef *def = *group;
for (; def->type; def++){
Expand Down Expand Up @@ -213,7 +218,7 @@ tw_opt_print(void)
const tw_optdef *def = *group;
for (; def->type; def++)
{
if (def->type == TWOPTTYPE_GROUP ||
if (def->type == TWOPTTYPE_GROUP ||
(def->name && 0 == strcmp(def->name, "help")))
continue;

Expand Down Expand Up @@ -415,6 +420,12 @@ tw_opt_parse(int *argc_p, char ***argv_p)
all_groups[i++] = basic;
all_groups[i] = NULL;

while (i_argc > 0) {
i_argc--;
const char *s = i_argv[i_argc];
match_opt(s);
}

while (argc > 1)
{
const char *s = argv[1];
Expand All @@ -436,3 +447,23 @@ tw_opt_parse(int *argc_p, char ***argv_p)
*argc_p = argc;
*argv_p = argv;
}

/**
* construct internal arguments to look like command line arguments
* these cannot be processed until ross is fully set up and tw_opt_parse is called (from tw_init)
*/
void tw_opt_set(const char *opt, const char *value) {
if (i_argc >= I_ARGV_MAX) {
tw_error(TW_LOC, "Too many internal options, increase I_ARGV_MAX.");
}

unsigned long len = strlen(opt) + strlen(value) + 4;
char * s = (char *)malloc(len*sizeof(char));
strcpy(s, "--");
strcat(s, opt);
strcat(s, "=");
strcat(s, value);

i_argv[i_argc] = s;
i_argc++;
}
2 changes: 2 additions & 0 deletions core/tw-opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct tw_optdef
extern void tw_opt_parse(int *argc, char ***argv);
/** Add an opt group */
extern void tw_opt_add(const tw_optdef *options);
/** Set an option at runtime. Command line take precedence */
extern void tw_opt_set(const char *option, const char *value);
/** Pretty-print the option descriptions (for --help) */
extern void tw_opt_print(void);
/** Pretty-print the option descriptions and current values */
Expand Down