Skip to content

Commit

Permalink
Add parsec_advise_data_on_device for zpotrf_L
Browse files Browse the repository at this point in the history
  • Loading branch information
Qinglei Cao committed May 17, 2024
1 parent 36d60da commit eeb82ec
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 10 deletions.
54 changes: 45 additions & 9 deletions src/zpotrf_L.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,45 @@ PRI_MAX [type = "int" hidden = on default = "(descA->mt * ( 6 + descA->mt
smallnb [type = "int" hidden = on default = "descA->mb" ]
CuHandlesID [type = "int" hidden = on default = -1 ]
POWorkspaceID [type = "int" hidden = on default = -1 ]
nb_gpu_devices [ type = "int" hidden = on default = 0 ]
cuda_device_index [ type = "int *" hidden = on default = "NULL"]
grid_rows [ type = "int" hidden = on default = 1]


/**************************************************
* potrf_bind_A *
**************************************************/
potrf_bind_A(m, n)

// Execution space
m = 0 .. descA->mt-1
n = 0 .. m

loc_A = %{ return LOC(descA, m, n); %}

// Parallel partitioning
:descA(m, n)

READ A <- ddescA(m, n) [ type = %{ return ADTT_READ(ddescA, loc_A, DEFAULT, TILED); %}
type_data = %{ return ADTT_READ(ddescA, loc_A, DEFAULT, LAPACK); %} ]
-> (m == 0 && n == 0) ? T potrf_zpotrf(0)
-> (n == 0)? C potrf_ztrsm(m, n)
-> (m == n && n > 0) ? T potrf_zherk(0, m)
-> (m != n && n > 0) ? C potrf_zgemm(m, n, 0)

BODY
{
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
if( nb_gpu_devices > 0 ) {
int g = m / grid_rows % nb_gpu_devices;
parsec_advise_data_on_device( _f_A->original,
cuda_device_index[g],
PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE );
}
#endif
}
END


/**************************************************
* potrf_zpotrf *
Expand All @@ -106,8 +145,7 @@ loc_T = %{ return LOC(descA, k, k); %}

// Parameters

RW T <- (k == 0) ? ddescA(k, k) [ type = %{ return ADTT_READ(ddescA, loc_T, DEFAULT, TILED); %}
type_data = %{ return ADTT_READ(ddescA, loc_T, DEFAULT, LAPACK); %} ]
RW T <- (k == 0) ? A potrf_bind_A(k, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_T, DEFAULT, TILED); %} ]
<- (k != 0) ? T potrf_zherk(k-1, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_T, DEFAULT, TILED); %} ]
-> T potrf_ztrsm(k+1..descA->mt-1, k) /* dep OUT: rely on datacopy dtt for sending */
-> ddescA(k, k) [ type = %{ return ADTT_CP(_f_T, ddescA, loc_T, DEFAULT); %}
Expand All @@ -117,6 +155,7 @@ RW T <- (k == 0) ? ddescA(k, k) [ type = %{ return ADTT_READ(d

BODY [type=CUDA]
{
printf("%d\n", k);
int tempkm = k == descA->mt-1 ? descA->m - k*descA->mb : descA->mb;
int ldak = LDA(ddescA, T);

Expand Down Expand Up @@ -207,8 +246,7 @@ loc_C = %{ return LOC(descA, m, k); %}

// Parameters
READ T <- T potrf_zpotrf(k) [ type_remote = %{ return ADTT_DC(ddescA, loc_T, DEFAULT, TILED); %} ]
RW C <- (k == 0) ? ddescA(m, k) [ type = %{ return ADTT_READ(ddescA, loc_C, DEFAULT, TILED); %}
type_data = %{ return ADTT_READ(ddescA, loc_C, DEFAULT, LAPACK); %} ]
RW C <- (k == 0) ? A potrf_bind_A(m, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_C, DEFAULT, TILED); %} ]
<- (k != 0) ? C potrf_zgemm(m, k, k-1) [ type_remote = %{ return ADTT_DC(ddescA, loc_C, DEFAULT, TILED); %} ]
-> A potrf_zherk(k, m) /* dep OUT: rely on datacopy dtt for sending */
-> A potrf_zgemm(m, k+1..m-1, k) /* dep OUT: rely on datacopy dtt for sending */
Expand Down Expand Up @@ -319,9 +357,8 @@ loc_T = %{ return LOC(descA, m, m); %}

//Parameters
READ A <- C potrf_ztrsm(m, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_A, DEFAULT, TILED); %} ]
RW T <- (k == 0) ? ddescA(m, m) [ type = %{ return ADTT_READ(ddescA, loc_T, DEFAULT, TILED); %}
type_data = %{ return ADTT_READ(ddescA, loc_T, DEFAULT, LAPACK); %} ]
<- (k != 0) ? T potrf_zherk(k-1, m) [ type_remote = %{ return ADTT_DC(ddescA, loc_T, DEFAULT, TILED); %} ]
RW T <- (k == 0) ? A potrf_bind_A(m, m) [ type_remote = %{ return ADTT_DC(ddescA, loc_T, DEFAULT, TILED); %} ]
<- (k != 0) ? T potrf_zherk(k-1, m) /* dep OUT: rely on datacopy dtt for sending */
-> (m == k+1) ? T potrf_zpotrf(m) : T potrf_zherk(k+1, m) /* dep OUT: rely on datacopy dtt for sending */

; (m >= (descA->mt - PRI_CHANGE)) ? (descA->mt - m) * (descA->mt - m) * (descA->mt - m) + 3 * (m - k) : PRI_MAX
Expand Down Expand Up @@ -422,8 +459,7 @@ loc_C = %{ return LOC(descA, m, n); %}
// Parameters
READ A <- C potrf_ztrsm(m, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_A, DEFAULT, TILED); %} ]
READ B <- C potrf_ztrsm(n, k) [ type_remote = %{ return ADTT_DC(ddescA, loc_B, DEFAULT, TILED); %} ]
RW C <- (k == 0) ? ddescA(m, n) [ type = %{ return ADTT_READ(ddescA, loc_C, DEFAULT, TILED); %}
type_data = %{ return ADTT_READ(ddescA, loc_C, DEFAULT, LAPACK); %} ]
RW C <- (k == 0) ? A potrf_bind_A(m, n) [ type_remote = %{ return ADTT_DC(ddescA, loc_C, DEFAULT, TILED); %} ]
<- (k != 0) ? C potrf_zgemm(m, n, k-1) [ type_remote = %{ return ADTT_DC(ddescA, loc_C, DEFAULT, TILED); %} ]
-> (n == k+1) ? C potrf_ztrsm(m, n) : C potrf_zgemm(m, n, k+1) /* dep OUT: rely on datacopy dtt for sending */

Expand Down
33 changes: 33 additions & 0 deletions src/zpotrf_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "zpotrf_U.h"
#include "zpotrf_L.h"
#include "cores/dplasma_plasmatypes.h"
#include "parsec/data_dist/matrix/sym_two_dim_rectangle_cyclic.h"

#define MAX_SHAPES 1

Expand Down Expand Up @@ -104,6 +105,33 @@ static void destroy_workspace(void *_ws, void *_n)
free(ws);
(void)_n;
}

/* Find all devices */
static void parsec_find_nb_devices(int **dev_index, int *nb) {
for(int i = 0; i < (int)parsec_nb_devices; i++) {
parsec_device_module_t *device = parsec_mca_device_get(i);
if( PARSEC_DEV_CUDA == device->type || PARSEC_DEV_HIP == device->type ) {
(*nb)++;
}
}
#if defined(DPLASMA_DEBUG)
if((*nb) == 0) {
char hostname[256];
gethostname(hostname, 256);
fprintf(stderr, "No CUDA device found on rank %d on %s\n",
parsec->my_rank, hostname);
}
#endif
*dev_index = (int *)malloc((*nb) * sizeof(int));
*nb = 0;
for(int i = 0; i < (int)parsec_nb_devices; i++) {
parsec_device_module_t *device = parsec_mca_device_get(i);
if( PARSEC_DEV_CUDA == device->type || PARSEC_DEV_HIP == device->type ) {
(*dev_index)[(*nb)++] = device->device_index;
}
}
}

#endif

/**
Expand Down Expand Up @@ -210,6 +238,11 @@ dplasma_zpotrf_New( dplasma_enum_t uplo,
destroy_workspace, NULL,
zpotrf_create_workspace, parsec_zpotrf,
NULL);
int nb = 0, *dev_index;
parsec_find_nb_devices(&dev_index, &nb);
parsec_zpotrf->_g_nb_gpu_devices = nb;
parsec_zpotrf->_g_cuda_device_index = dev_index;
parsec_zpotrf->_g_grid_rows = ((parsec_matrix_sym_block_cyclic_t *)A)->grid.rows;
#else
parsec_zpotrf->_g_CuHandlesID = PARSEC_INFO_ID_UNDEFINED;
parsec_zpotrf->_g_POWorkspaceID = PARSEC_INFO_ID_UNDEFINED;
Expand Down
3 changes: 2 additions & 1 deletion tests/testing_zpotrf.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ int main(int argc, char ** argv)
{
parsec_context_t* parsec;
int iparam[IPARAM_SIZEOF];
dplasma_enum_t uplo = dplasmaUpper;
//dplasma_enum_t uplo = dplasmaUpper;
dplasma_enum_t uplo = dplasmaLower;
int info = 0;
int ret = 0;

Expand Down

0 comments on commit eeb82ec

Please sign in to comment.