diff --git a/CMakeLists.txt b/CMakeLists.txt index b827f4162..b9512afbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,6 +132,12 @@ IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64) SET(CLOCK aarch64) ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64) +IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l) + SET(VALID_ARCH YES) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + ADD_DEFINITIONS(-D_GNU_SOURCE) + SET(CLOCK armv7l) +ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l) IF(VALID_ARCH) MESSAGE(STATUS "System architecture detected: ${CMAKE_SYSTEM_PROCESSOR}") @@ -140,7 +146,7 @@ IF(VALID_ARCH) ELSE(VALID_ARCH) MESSAGE(FATAL_ERROR "System architecture not recognized!\n" "Found: ${CMAKE_SYSTEM_PROCESSOR}\n" - "Expected: i386 | x86_64 | ppc64 | ppc64le | aarch64") + "Expected: i386 | x86_64 | ppc64 | ppc64le | aarch64 | armv7l") ENDIF(VALID_ARCH) ## MPI diff --git a/core/clock/armv7l.c b/core/clock/armv7l.c new file mode 100644 index 000000000..95c9b612b --- /dev/null +++ b/core/clock/armv7l.c @@ -0,0 +1,48 @@ +/* + This implementation of an ARM v7 clock reader utilizes the + Performance Monitoring Unit (PMU) on Cortex-A7 chips. + Unfortunately, access to the cycle counter from userspace + is disabled by default. A kernel module that enables access + from userspace is required or the system will fault. + + An example kernel module that does just that can be found: + https://github.com/nmcglohon/armv7l-userspace-counter.git + + More information can be found: + http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html + */ + +#include + +#ifndef __GNUC__ +# error gcc asm extensions required +#endif +#if ! (defined(__arm__)) +# error only 32 bit arm platform supported +#endif + +static const tw_optdef clock_opts [] = +{ + TWOPT_GROUP("ROSS Timing"), + TWOPT_STIME("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), + TWOPT_END() +}; + +const tw_optdef *tw_clock_setup(void) +{ + return clock_opts; +} + + +void tw_clock_init(tw_pe * me) +{ + me->clock_time = 0; + me->clock_offset = tw_clock_read(); +} + + +tw_clock tw_clock_now(tw_pe * me) +{ + me->clock_time = tw_clock_read() - me->clock_offset; + return me->clock_time; +} diff --git a/core/clock/armv7l.h b/core/clock/armv7l.h new file mode 100644 index 000000000..65433b4cf --- /dev/null +++ b/core/clock/armv7l.h @@ -0,0 +1,32 @@ +/* + This implementation of an ARM v7 clock reader utilizes the + Performance Monitoring Unit (PMU) on Cortex-A7 chips. + Unfortunately, access to the cycle counter from userspace + is disabled by default. A kernel module that enables access + from userspace is required or the system will fault. + + An example kernel module that does just that can be found: + https://github.com/nmcglohon/armv7l-userspace-counter.git + + More information can be found: + http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html + */ + +#ifndef INC_clock_armv7l +#define INC_clock_armv7l + +typedef unsigned int tw_clock; + +static inline tw_clock tw_clock_read(void) +{ + unsigned int result; +#ifdef ROSS_timing + do { + __asm__ __volatile__ ("MRC p15, 0, %0, c9, c13, 0" : "=r"(result)); + } while (__builtin_expect ((int) result == -1, 0)); +#endif + + return result; +} + +#endif diff --git a/core/network-mpi.c b/core/network-mpi.c index 306bffcd3..2169e6cf1 100644 --- a/core/network-mpi.c +++ b/core/network-mpi.c @@ -7,10 +7,15 @@ int custom_communicator = 0; /** * @struct act_q * @brief Keeps track of posted send or recv operations. + * + * This list structure is used *only* by the network mpi layer (this + * file). Within this file, two lists are used, for MPI Irecv and + * Isend requests. The MPI requests and statusus are linked with an + * event buffer through this struct. */ struct act_q { - const char *name; + const char *name; /**< name of the list, used in error printouts */ tw_event **event_list; /**< list of event pointers in this queue */ MPI_Request *req_list; /**< list of MPI request handles */ @@ -27,8 +32,8 @@ static struct act_q posted_sends; static struct act_q posted_recvs; static tw_eventq outq; -static unsigned int read_buffer = 16; -static unsigned int send_buffer = 1024; +static unsigned int read_buffer = 16; /**< Number of Irecv's to buffer, length of posted_recvs queue */ +static unsigned int send_buffer = 1024; /**< Number of Isend's to buffer, length of posted_sends queue */ static int world_size = 1; static const tw_optdef mpi_opts[] = { @@ -85,20 +90,13 @@ tw_net_init(int *argc, char ***argv) * @param[in] name name of the queue */ static void -init_q(struct act_q *q, const char *name) +init_q(struct act_q *q, const char *name, unsigned int size) { - unsigned int n; - - if(q == &posted_sends) - n = send_buffer; - else - n = read_buffer; - q->name = name; - q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(*q->event_list), n); - q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(*q->req_list), n); - q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(*q->idx_list), n); - q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(*q->status_list), n); + q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(*q->event_list), size); + q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(*q->req_list), size); + q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(*q->idx_list), size); + q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(*q->status_list), size); } unsigned int @@ -140,13 +138,12 @@ tw_net_start(void) g_tw_pe->hash_t = NULL; } - if (send_buffer < 1) - tw_error(TW_LOC, "network send buffer must be >= 1"); - if (read_buffer < 1) - tw_error(TW_LOC, "network read buffer must be >= 1"); + // these values are command line options + if (send_buffer < 1) tw_error(TW_LOC, "network send buffer must be >= 1"); + if (read_buffer < 1) tw_error(TW_LOC, "network read buffer must be >= 1"); - init_q(&posted_sends, "MPI send queue"); - init_q(&posted_recvs, "MPI recv queue"); + init_q(&posted_sends, "MPI send queue", send_buffer); + init_q(&posted_recvs, "MPI recv queue", read_buffer); g_tw_net_device_size = read_buffer; diff --git a/core/ross.h b/core/ross.h index 3aa236d94..459cdc0d8 100644 --- a/core/ross.h +++ b/core/ross.h @@ -185,6 +185,9 @@ typedef uint64_t tw_lpid; #ifdef ROSS_CLOCK_aarch64 # include "clock/aarch64.h" #endif +#ifdef ROSS_CLOCK_armv7l +# include "clock/armv7l.h" +#endif #include "tw-timing.h" #include "ross-types.h" diff --git a/core/tw-eventq.h b/core/tw-eventq.h index 6404a68fb..870041b8d 100644 --- a/core/tw-eventq.h +++ b/core/tw-eventq.h @@ -198,7 +198,6 @@ tw_eventq_alloc(tw_eventq * q, unsigned int cnt) g_tw_event_msg_sz = event_len; // compute number of events needed for the network. - g_tw_gvt_threshold = (int) ceil(g_tw_net_device_size / g_tw_event_msg_sz); g_tw_gvt_threshold = g_tw_net_device_size; g_tw_events_per_pe += g_tw_gvt_threshold; cnt += g_tw_gvt_threshold; diff --git a/core/tw-opts.c b/core/tw-opts.c index cf440d53a..5ea877c35 100644 --- a/core/tw-opts.c +++ b/core/tw-opts.c @@ -10,6 +10,11 @@ static int is_empty(const tw_optdef *def); static const tw_optdef *opt_groups[10]; static unsigned int opt_index = 0; +// internally set options registered with tw_opt_set +#define I_ARGV_MAX 16 +static int i_argc = 0; +static char * i_argv[I_ARGV_MAX]; + void tw_opt_add(const tw_optdef *options) { @@ -121,7 +126,7 @@ show_help(void) cnt++; } } - + // CMake used to pass options by command line flags fprintf(stderr, "ROSS CMake Configuration Options:\n"); fprintf(stderr, " (See build-dir/core/config.h)\n"); @@ -130,7 +135,7 @@ show_help(void) void tw_opt_settings(FILE *outfile) { const tw_optdef **group = all_groups; unsigned cnt = 0; - + for (; *group; group++){ const tw_optdef *def = *group; for (; def->type; def++){ @@ -213,7 +218,7 @@ tw_opt_print(void) const tw_optdef *def = *group; for (; def->type; def++) { - if (def->type == TWOPTTYPE_GROUP || + if (def->type == TWOPTTYPE_GROUP || (def->name && 0 == strcmp(def->name, "help"))) continue; @@ -415,6 +420,12 @@ tw_opt_parse(int *argc_p, char ***argv_p) all_groups[i++] = basic; all_groups[i] = NULL; + while (i_argc > 0) { + i_argc--; + const char *s = i_argv[i_argc]; + match_opt(s); + } + while (argc > 1) { const char *s = argv[1]; @@ -436,3 +447,23 @@ tw_opt_parse(int *argc_p, char ***argv_p) *argc_p = argc; *argv_p = argv; } + +/** + * construct internal arguments to look like command line arguments + * these cannot be processed until ross is fully set up and tw_opt_parse is called (from tw_init) + */ +void tw_opt_set(const char *opt, const char *value) { + if (i_argc >= I_ARGV_MAX) { + tw_error(TW_LOC, "Too many internal options, increase I_ARGV_MAX."); + } + + unsigned long len = strlen(opt) + strlen(value) + 4; + char * s = (char *)malloc(len*sizeof(char)); + strcpy(s, "--"); + strcat(s, opt); + strcat(s, "="); + strcat(s, value); + + i_argv[i_argc] = s; + i_argc++; +} diff --git a/core/tw-opts.h b/core/tw-opts.h index 5337d9c06..c81d0bd34 100644 --- a/core/tw-opts.h +++ b/core/tw-opts.h @@ -36,6 +36,8 @@ struct tw_optdef extern void tw_opt_parse(int *argc, char ***argv); /** Add an opt group */ extern void tw_opt_add(const tw_optdef *options); +/** Set an option at runtime. Command line take precedence */ +extern void tw_opt_set(const char *option, const char *value); /** Pretty-print the option descriptions (for --help) */ extern void tw_opt_print(void); /** Pretty-print the option descriptions and current values */