Skip to content

Commit

Permalink
Merge pull request trilinos#11600 from trilinos/kokkos-promotion
Browse files Browse the repository at this point in the history
Kokkos + KokkosKernels Promotion To 4.0.0
  • Loading branch information
ndellingwood authored Mar 2, 2023
2 parents c6ad735 + 612650d commit 8262023
Show file tree
Hide file tree
Showing 2,822 changed files with 94,743 additions and 103,904 deletions.
129 changes: 40 additions & 89 deletions cmake/std/atdm/contributed/weaver/environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,12 @@

# Handle compiler defaults

if [[ "$ATDM_CONFIG_COMPILER" == "GNU-7.2.0" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "GNU" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "DEFAULT" ]] \
if [[ "$ATDM_CONFIG_COMPILER" == "CUDA" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA-11.2" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA-11.2_GNU-8.3.0" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA-11.2_GNU-8.3.0-OPENMPI-4.1.1" ]] \
; then
export ATDM_CONFIG_COMPILER=GNU-7.2.0-OPENMPI-2.1.2

elif [[ "$ATDM_CONFIG_COMPILER" == "GNU-7.2.0-OPENMPI-4.0.1" ]] \
; then
export ATDM_CONFIG_COMPILER=GNU-7.2.0-OPENMPI-4.0.1

elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA-9.2_GNU-7.2.0" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA-9.2" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA" ]] \
; then
export ATDM_CONFIG_COMPILER=CUDA-9.2-GNU-7.2.0-OPENMPI-2.1.2

elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1_GNU-7.2.0" ]] \
|| [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1" ]] \
; then
export ATDM_CONFIG_COMPILER=CUDA-10.1-GNU-7.2.0-OPENMPI-4.0.1
export ATDM_CONFIG_COMPILER=CUDA-11.2-GNU-8.3.0-OPENMPI-4.1.1

else
echo
Expand All @@ -36,30 +22,14 @@ else
echo "***"
echo "*** Suppoted compilers include:"
echo "***"
echo "*** gnu-7.2.0 (default and default gnu)"
echo "*** cuda-9.2-gnu-7.2.0 (default cuda, cuda-9.2)"
echo "*** cuda-10.1-gnu-7.2.0 (cuda-10.1)"
echo "*** cuda-11.2-gnu-8.3.0 (cuda-11.2 default)"
echo "***"
return
fi

# Handle KOKKOS_ARCH

if [[ "$ATDM_CONFIG_COMPILER" == "GNU"* ]]; then
if [[ "$ATDM_CONFIG_KOKKOS_ARCH" == "DEFAULT" ]] ; then
export ATDM_CONFIG_KOKKOS_ARCH=Power9
elif [[ "$ATDM_CONFIG_KOKKOS_ARCH" == "Power9" ]] ; then
export ATDM_CONFIG_KOKKOS_ARCH=Power9
else
echo
echo "***"
echo "*** ERROR: KOKKOS_ARCH=$ATDM_CONFIG_KOKKOS_ARCH is not a valid option"
echo "*** for the compiler GNU. Replace '$ATDM_CONFIG_KOKKOS_ARCH' in the"
echo "*** job name with 'Power9'"
echo "***"
return
fi
elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then
if [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then
if [[ "$ATDM_CONFIG_KOKKOS_ARCH" == "DEFAULT" ]] ; then
export ATDM_CONFIG_KOKKOS_ARCH=Power9,Volta70
elif [[ "$ATDM_CONFIG_KOKKOS_ARCH" == "Power9" ]] ; then
Expand Down Expand Up @@ -87,8 +57,8 @@ fi

echo "Using weaver compiler stack $ATDM_CONFIG_COMPILER to build $ATDM_CONFIG_BUILD_TYPE code with Kokkos node type $ATDM_CONFIG_NODE_TYPE and KOKKOS_ARCH=$ATDM_CONFIG_KOKKOS_ARCH"

export ATDM_CONFIG_ENABLE_SPARC_SETTINGS=ON
export ATDM_CONFIG_USE_NINJA=ON
export ATDM_CONFIG_ENABLE_SPARC_SETTINGS=OFF
export ATDM_CONFIG_USE_NINJA=OFF

if [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] \
&& [[ "${ATDM_CONFIG_CUDA_RDC}" == "ON" ]] \
Expand All @@ -106,9 +76,10 @@ else
export ATDM_CONFIG_BUILD_COUNT=64
fi

source /etc/profile.d/modules.sh
module purge

module load git/2.10.1
module load git/2.10.1 python/3.7.3
# NOTE: Must load a git module since /usr/bin/git does not exist on the
# compute nodes.

Expand All @@ -119,35 +90,10 @@ else
export ATDM_CONFIG_CTEST_PARALLEL_LEVEL=40
fi

if [ "$ATDM_CONFIG_COMPILER" == "GNU-7.2.0-OPENMPI-2.1.2" ]; then
if [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then

module load devpack/20180517/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88
module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0
export OMPI_CXX=`which g++`
export OMPI_CC=`which gcc`
export OMPI_FC=`which gfortran`
export ATDM_CONFIG_LAPACK_LIBS="-L${LAPACK_ROOT}/lib;-llapack;-lgfortran;-lgomp"
export ATDM_CONFIG_BLAS_LIBS="-L${BLAS_ROOT}/lib;-lblas;-lgfortran;-lgomp;-lm"

elif [ "$ATDM_CONFIG_COMPILER" == "GNU-7.2.0-OPENMPI-4.0.1" ]; then

module load devpack/20190814/openmpi/4.0.1/gcc/7.2.0/cuda/10.1.105
module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0
export OMPI_CXX=`which g++`
export OMPI_CC=`which gcc`
export OMPI_FC=`which gfortran`
export ATDM_CONFIG_LAPACK_LIBS="-L${LAPACK_ROOT}/lib;-llapack;-lgfortran;-lgomp"
export ATDM_CONFIG_BLAS_LIBS="-L${BLAS_ROOT}/lib;-lblas;-lgfortran;-lgomp;-lm"

elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then

if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-9.2-GNU-7.2.0-OPENMPI-2.1.2" ]] ; then
module load devpack/20180517/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88
module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0

elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA-10.1-GNU-7.2.0-OPENMPI-4.0.1" ]] ; then
module load devpack/20190814/openmpi/4.0.1/gcc/7.2.0/cuda/10.1.105
module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0
if [[ "$ATDM_CONFIG_COMPILER" == "CUDA-11.2-GNU-8.3.0-OPENMPI-4.1.1" ]] ; then
module load cmake/3.24.2 cuda/11.2.2/gcc/8.3.1 openmpi/4.1.1/gcc/8.3.1/cuda/11.2.2 openblas/0.3.18/gcc/8.3.1 boost/1.70.0/gcc/8.3.1 metis/5.1.0/gcc/8.3.1 zlib/1.2.11/gcc/8.3.1 hdf5/1.10.7/gcc/8.3.1/openmpi/4.1.1 netcdf-c/4.8.1/gcc/8.3.1/openmpi/4.1.1 parallel-netcdf/1.12.2/gcc/8.3.1/openmpi/4.1.1

else
echo
Expand All @@ -165,53 +111,58 @@ elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then

export OMPI_CC=`which gcc`
export OMPI_FC=`which gfortran`
export ATDM_CONFIG_LAPACK_LIBS="-L${LAPACK_ROOT}/lib;-llapack;-lgfortran;-lgomp"
export ATDM_CONFIG_BLAS_LIBS="-L${BLAS_ROOT}/lib;-lblas;-lgfortran;-lgomp;-lm"

export ATDM_CONFIG_LAPACK_LIBS="-L${OPENBLAS_ROOT}/lib;-lopenblas"
export ATDM_CONFIG_BLAS_LIBS="-L${OPENBLAS_ROOT}/lib;-lopenblas"
# export ATDM_CONFIG_LAPACK_LIBS="-L${LAPACK_ROOT}/lib;-llapack;-lgfortran;-lgomp"
# export ATDM_CONFIG_BLAS_LIBS="-L${BLAS_ROOT}/lib;-lblas;-lgfortran;-lgomp;-lm"

export ATDM_CONFIG_USE_CUDA=ON
export CUDA_LAUNCH_BLOCKING=1
export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1
# export CUDA_LAUNCH_BLOCKING=1
# export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1
# export KOKKOS_NUM_DEVICES=2
export ATDM_CONFIG_CTEST_PARALLEL_LEVEL=2
# Avoids timeouts due to not running on separate GPUs (e.g. see #2446)

fi

# Ninja
#module load ninja/1.7.2

# CMake
#module swap cmake/3.6.2 cmake/3.12.3
module unload cmake/3.6.2
module load cmake/3.19.3
module load cmake/3.24.2

# HWLOC

export ATDM_CONFIG_USE_HWLOC=OFF

# Let's see if the TPLs loaded by devpack/20180517/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88 work for SPARC?

export ATDM_CONFIG_BINUTILS_LIBS="${BINUTILS_ROOT}/lib/libbfd.a;${BINUTILS_ROOT}/lib/libiberty.a"
#export ATDM_CONFIG_BINUTILS_LIBS="${BINUTILS_ROOT}/lib/libbfd.a;${BINUTILS_ROOT}/lib/libiberty.a"

# HDF5 and Netcdf

# NOTE: HDF5_ROOT and NETCDF_ROOT should already be set in env from above
# NOTE: HDF5_ROOT and NETCDF_C_ROOT should already be set in env from above
# module loads!

# However, set the direct libs for HDF5 and NetCDF in case we use that option
# for building (see env var ATDM_CONFIG_USE_SPARC_TPL_FIND_SETTINGS).
if [[ "${PNETCDF_ROOT}" == "" ]] ; then
export PNETCDF_ROOT=${NETCDF_ROOT}
fi

export ATDM_CONFIG_HDF5_LIBS="-L${HDF5_ROOT}/lib;${HDF5_ROOT}/lib/libhdf5_hl.a;${HDF5_ROOT}/lib/libhdf5.a;-lz;-ldl"

export ATDM_CONFIG_NETCDF_LIBS="-L${NETCDF_ROOT}/lib;${NETCDF_ROOT}/lib/libnetcdf.a;${PNETCDF_ROOT}/lib/libpnetcdf.a;${ATDM_CONFIG_HDF5_LIBS}"
if [[ "${PARALLEL_NETCDF_ROOT}" == "" ]] ; then
export NETCDF_ROOT=${NETCDF_C_ROOT}
export PNETCDF_ROOT=${NETCDF_C_ROOT}
export ATDM_CONFIG_NETCDF_LIBS="-L${NETCDF_C_ROOT}/lib;${NETCDF_C_ROOT}/lib/libnetcdf.a;${ATDM_CONFIG_HDF5_LIBS}"
else
export NETCDF_ROOT=${NETCDF_C_ROOT}
export PNETCDF_ROOT=${PARALLEL_NETCDF_ROOT}
export ATDM_CONFIG_NETCDF_LIBS="-L${NETCDF_C_ROOT}/lib;${NETCDF_C_ROOT}/lib/libnetcdf.a;${PNETCDF_ROOT}/lib/libpnetcdf.a;${ATDM_CONFIG_HDF5_LIBS}"
fi


# SuperLUDist

if [[ "${ATDM_CONFIG_SUPERLUDIST_INCLUDE_DIRS}" == "" ]] ; then
export ATDM_CONFIG_SUPERLUDIST_INCLUDE_DIRS=${SUPERLUDIST_ROOT}/include
export ATDM_CONFIG_SUPERLUDIST_LIBS="${SUPERLUDIST_ROOT}/lib/libsuperlu_dist.a;${METIS_ROOT}/lib/libmetis.a"
fi
#if [[ "${ATDM_CONFIG_SUPERLUDIST_INCLUDE_DIRS}" == "" ]] ; then
# export ATDM_CONFIG_SUPERLUDIST_INCLUDE_DIRS=${SUPERLUDIST_ROOT}/include
# export ATDM_CONFIG_SUPERLUDIST_LIBS="${SUPERLUDIST_ROOT}/lib/libsuperlu_dist.a;${METIS_ROOT}/lib/libmetis.a"
#fi

# Set MPI wrappers
export MPICC=`which mpicc`
Expand Down
8 changes: 4 additions & 4 deletions packages/amesos2/src/KLU2/Include/klu2_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@
#define MIN(a,b) (((a) < (b)) ? (a) : (b))

/* FLIP is a "negation about -1", and is used to mark an integer i that is
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY
* normally non-negative. FLIP (AMESOS2_KLU2_EMPTY) is AMESOS2_KLU2_EMPTY. FLIP of a number > AMESOS2_KLU2_EMPTY
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i
* for all integers i. UNFLIP (i) is >= EMPTY. */
#define EMPTY (-1)
* for all integers i. UNFLIP (i) is >= AMESOS2_KLU2_EMPTY. */
#define AMESOS2_KLU2_EMPTY (-1)
#define FLIP(i) (-(i)-2)
#define UNFLIP(i) (((i) < EMPTY) ? FLIP (i) : (i))
#define UNFLIP(i) (((i) < AMESOS2_KLU2_EMPTY) ? FLIP (i) : (i))

template <typename Entry, typename Int>
size_t KLU_kernel /* final size of LU on output */
Expand Down
30 changes: 15 additions & 15 deletions packages/amesos2/src/KLU2/Source/klu2_analyze.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
#ifndef NDEBUGKLU2
for (k = 0 ; k < n ; k++)
{
P [k] = EMPTY ;
Q [k] = EMPTY ;
Pinv [k] = EMPTY ;
P [k] = AMESOS2_KLU2_EMPTY ;
Q [k] = AMESOS2_KLU2_EMPTY ;
Pinv [k] = AMESOS2_KLU2_EMPTY ;
}
#endif
for (k = 0 ; k < n ; k++)
Expand All @@ -106,13 +106,13 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
Pinv [Pbtf [k]] = k ;
}
#ifndef NDEBUGKLU2
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
#endif
nzoff = 0 ;
lnz = 0 ;
maxnz = 0 ;
flops = 0 ;
Symbolic->symmetry = EMPTY ; /* only computed by AMD */
Symbolic->symmetry = AMESOS2_KLU2_EMPTY ; /* only computed by AMD */

/* ---------------------------------------------------------------------- */
/* order each block */
Expand All @@ -134,7 +134,7 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
/* construct the kth block, C */
/* ------------------------------------------------------------------ */

Lnz [block] = EMPTY ;
Lnz [block] = AMESOS2_KLU2_EMPTY ;
pc = 0 ;
for (k = k1 ; k < k2 ; k++)
{
Expand Down Expand Up @@ -226,8 +226,8 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
/*ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;*/
ok = KLU_OrdinalTraits<Int>::colamd (nk, nk, Cilen, Ci, Cp,
NULL, cstats) ;
lnz1 = EMPTY ;
flops1 = EMPTY ;
lnz1 = AMESOS2_KLU2_EMPTY ;
flops1 = AMESOS2_KLU2_EMPTY ;

/* copy the permutation from Cp to Pblk */
for (k = 0 ; k < nk ; k++)
Expand All @@ -244,7 +244,7 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
/* -------------------------------------------------------------- */

lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
flops1 = EMPTY ;
flops1 = AMESOS2_KLU2_EMPTY ;
ok = (lnz1 != 0) ;
}

Expand All @@ -258,8 +258,8 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
/* ------------------------------------------------------------------ */

Lnz [block] = lnz1 ;
lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ;
flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ;
lnz = (lnz == AMESOS2_KLU2_EMPTY || lnz1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (lnz + lnz1) ;
flops = (flops == AMESOS2_KLU2_EMPTY || flops1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (flops + flops1) ;

/* ------------------------------------------------------------------ */
/* combine the preordering with the BTF ordering */
Expand All @@ -284,10 +284,10 @@ static Int analyze_worker /* returns KLU_OK or < 0 if error */
ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;

/* return estimates of # of nonzeros in L including diagonal */
Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */
Symbolic->lnz = lnz ; /* AMESOS2_KLU2_EMPTY if COLAMD used */
Symbolic->unz = lnz ;
Symbolic->nzoff = nzoff ;
Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */
Symbolic->est_flops = flops ; /* AMESOS2_KLU2_EMPTY if COLAMD or user-ordering used */
return (KLU_OK) ;
}

Expand Down Expand Up @@ -383,7 +383,7 @@ static KLU_symbolic<Entry, Int> *order_and_analyze /* returns NULL if error, or
do_btf = (do_btf) ? TRUE : FALSE ;
Symbolic->ordering = ordering ;
Symbolic->do_btf = do_btf ;
Symbolic->structural_rank = EMPTY ;
Symbolic->structural_rank = AMESOS2_KLU2_EMPTY ;

/* ---------------------------------------------------------------------- */
/* find the block triangular form (if requested) */
Expand Down Expand Up @@ -524,7 +524,7 @@ KLU_symbolic<Entry, Int> *KLU_analyze /* returns NULL if error, or a valid
return (NULL) ;
}
Common->status = KLU_OK ;
Common->structural_rank = EMPTY ;
Common->structural_rank = AMESOS2_KLU2_EMPTY ;

/* ---------------------------------------------------------------------- */
/* order and analyze */
Expand Down
10 changes: 5 additions & 5 deletions packages/amesos2/src/KLU2/Source/klu2_analyze_given.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ KLU_symbolic<Entry, Int> *KLU_alloc_symbolic
}
for (i = 0 ; i < n ; i++)
{
P [i] = EMPTY ;
P [i] = AMESOS2_KLU2_EMPTY ;
}
for (j = 0 ; j < n ; j++)
{
Expand Down Expand Up @@ -357,7 +357,7 @@ KLU_symbolic<Entry, Int> *KLU_analyze_given /* returns NULL if error, or a v
}

/* fill-in not estimated */
Lnz [block] = EMPTY ;
Lnz [block] = AMESOS2_KLU2_EMPTY ;
}

/* ------------------------------------------------------------------ */
Expand All @@ -384,7 +384,7 @@ KLU_symbolic<Entry, Int> *KLU_analyze_given /* returns NULL if error, or a v
maxblock = n ;
R [0] = 0 ;
R [1] = n ;
Lnz [0] = EMPTY ;
Lnz [0] = AMESOS2_KLU2_EMPTY ;

/* ------------------------------------------------------------------ */
/* P = Puser, or identity if Puser is NULL */
Expand All @@ -402,8 +402,8 @@ KLU_symbolic<Entry, Int> *KLU_analyze_given /* returns NULL if error, or a v

Symbolic->nblocks = nblocks ;
Symbolic->maxblock = maxblock ;
Symbolic->lnz = EMPTY ;
Symbolic->unz = EMPTY ;
Symbolic->lnz = AMESOS2_KLU2_EMPTY ;
Symbolic->unz = AMESOS2_KLU2_EMPTY ;
Symbolic->nzoff = nzoff ;

return (Symbolic) ;
Expand Down
14 changes: 7 additions & 7 deletions packages/amesos2/src/KLU2/Source/klu2_defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ Int KLU_defaults
/* statistics */
Common->status = KLU_OK ;
Common->nrealloc = 0 ;
Common->structural_rank = EMPTY ;
Common->numerical_rank = EMPTY ;
Common->noffdiag = EMPTY ;
Common->flops = EMPTY ;
Common->rcond = EMPTY ;
Common->condest = EMPTY ;
Common->rgrowth = EMPTY ;
Common->structural_rank = AMESOS2_KLU2_EMPTY ;
Common->numerical_rank = AMESOS2_KLU2_EMPTY ;
Common->noffdiag = AMESOS2_KLU2_EMPTY ;
Common->flops = AMESOS2_KLU2_EMPTY ;
Common->rcond = AMESOS2_KLU2_EMPTY ;
Common->condest = AMESOS2_KLU2_EMPTY ;
Common->rgrowth = AMESOS2_KLU2_EMPTY ;
Common->work = 0 ; /* work done by btf_order */

Common->memusage = 0 ;
Expand Down
2 changes: 1 addition & 1 deletion packages/amesos2/src/KLU2/Source/klu2_diagnostics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ Int KLU_flops /* return TRUE if successful, FALSE otherwise */
{
return (FALSE) ;
}
Common->flops = EMPTY ;
Common->flops = AMESOS2_KLU2_EMPTY ;
if (Numeric == NULL || Symbolic == NULL)
{
Common->status = KLU_INVALID ;
Expand Down
Loading

0 comments on commit 8262023

Please sign in to comment.