Skip to content

Commit

Permalink
Merge pull request #77 from tarudoodi/release/ccl_2021.6
Browse files Browse the repository at this point in the history
Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.6
  • Loading branch information
tarudoodi authored Aug 26, 2022
2 parents b7d66de + 36bab17 commit 0ab9bf9
Show file tree
Hide file tree
Showing 65 changed files with 2,402 additions and 1,502 deletions.
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ option(ENABLE_OFI_HMEM "Enable support OFI HMEM support" FALSE)
option(ENABLE_OFI_OOT_PROV "Enable OFI out-of-tree providers support" FALSE)
option(ENABLE_ITT "Enable ITT profiling support" TRUE)
option(ENABLE_STUB_BACKEND "Enable stub backend" TRUE)
option(ENABLE_LINKER_RUNPATH "Enable linker runpath flags" FALSE)

option(USE_CODECOV_FLAGS "Calculate code coverage" FALSE)
option(WITH_ASAN "Use address sanitizer, can only be used in Debug build" FALSE)
Expand All @@ -80,7 +81,8 @@ message(STATUS "Enable SYCL interop event support: ${ENABLE_SYCL_INTEROP_EVENT}"
message(STATUS "Enable OFI HMEM support: ${ENABLE_OFI_HMEM}")
message(STATUS "Enable OFI out-of-tree providers support: ${ENABLE_OFI_OOT_PROV}")
message(STATUS "Enable ITT profiling support: ${ENABLE_ITT}")
message(STATUS "Enable stub backend" ${ENABLE_STUB_BACKEND})
message(STATUS "Enable stub backend: ${ENABLE_STUB_BACKEND}")
message(STATUS "Enable linker rpath flags: ${ENABLE_LINKER_RUNPATH}")

add_definitions(-DCCL_C_COMPILER="${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
add_definitions(-DCCL_CXX_COMPILER="${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
Expand Down Expand Up @@ -226,8 +228,8 @@ enable_testing()

set(EXTERNAL_LIBS "")

set(EXAMPLES_INC_DIRS ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/examples/include ${MPI_INCLUDE_DIR} ${LEVEL_ZERO_INCLUDE_DIR})
set(EXAMPLES_LIB_DIRS ${MPI_LIB_DIR} ${LIBFABRIC_LIB_DIR})
set(EXAMPLES_INC_DIRS ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/examples/include ${MPI_INCLUDE_DIR})
set(EXAMPLES_LIB_DIRS ${MPI_LIB_DIR})

# allow `deprecated`
set(CMAKE_CLANG_FLAGS "${CMAKE_CLANG_FLAGS}")
Expand All @@ -254,8 +256,8 @@ file(GLOB spv_kernels "${PROJECT_SOURCE_DIR}/src/kernels/kernels.spv")
endif()

set(CCL_MAJOR_VERSION "2021")
set(CCL_MINOR_VERSION "5")
set(CCL_UPDATE_VERSION "2")
set(CCL_MINOR_VERSION "6")
set(CCL_UPDATE_VERSION "0")
set(CCL_PRODUCT_STATUS "Gold")
string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
get_vcs_properties("git")
Expand Down
7 changes: 7 additions & 0 deletions deps/itt/include/ittnotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -3985,6 +3985,13 @@ ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* hist, size_t length,
* @return collection state as a enum __itt_collection_state
*/
__itt_collection_state __itt_get_collection_state(void);

/**
* @brief function releases resources allocated by ITT API static part
* this API should be called from the library destructor
* @return void
*/
void __itt_release_resources(void);
/** @endcond */

#ifdef __cplusplus
Expand Down
Binary file modified deps/itt/lib64/libittnotify.a
Binary file not shown.
Binary file added deps/itt/lib64/tracing_functions.so
Binary file not shown.
12 changes: 10 additions & 2 deletions deps/mpi/bin/mpigcc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ fi
# Determined by a combination of environment variables and tests within
# configure (e.g., determining whehter -lsocket is needee)
CC="gcc"
MPICH_VERSION="3.4a2"
MPICH_VERSION="3.3"
CFLAGS=""
CPPFLAGS=""
LDFLAGS=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
LIBS="-lm -lpthread -lfabric -lrt "
MPIVERSION="2021.5"
MPILIBNAME="mpi"

Expand Down Expand Up @@ -590,6 +594,10 @@ fi
final_cppflags=" "
final_ldflags=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
final_libs="-lpthread -lrt "
if test "no" = "no" -o "${interlib_deps}" = "no" ; then
final_ldflags="${final_ldflags} -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl"
final_libs="${final_libs} -lm -lpthread -lfabric -lrt "
fi

# -----------------------------------------------------------------------
#
Expand All @@ -614,7 +622,7 @@ if [ "$linking" = yes ] ; then
$Show $CC ${final_cppflags} $PROFILE_INCPATHS ${final_cflags} ${final_ldflags} $allargs -I\"${includedir}\"
rc=$?
else
$Show $CC $CPPFLAGS $CFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $mpilibs $I_MPI_OTHERLIBS ${final_ldflags}
$Show $CC $CPPFLAGS $CFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $mpilibs $I_MPI_OTHERLIBS $LDFLAGS
rc=$?

if [ $rc -eq 0 -a "x$strip_debug_info" = "xyes" ] ; then
Expand Down
11 changes: 9 additions & 2 deletions deps/mpi/bin/mpigxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ fi

# Default settings for compiler, flags, and libraries
CXX="g++"
MPICH_VERSION="3.4a2"
MPICH_VERSION="3.3"
CXXFLAGS=""
LDFLAGS=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
LIBS="-lm -lpthread -lfabric -lrt "
MPIVERSION="2021.5"
MPILIBNAME="mpi"
MPICXXLIBNAME="mpicxx"
Expand Down Expand Up @@ -603,6 +606,10 @@ fi
final_cppflags=" "
final_ldflags=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
final_libs="-lpthread -lrt "
if test "no" = "no" -o "${interlib_deps}" = "no" ; then
final_ldflags="${final_ldflags} -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl"
final_libs="${final_libs} -lm -lpthread -lfabric -lrt "
fi

# A temporary statement to invoke the compiler
# Place the -L before any args incase there are any mpi libraries in there.
Expand All @@ -618,7 +625,7 @@ if [ "$linking" = yes ] ; then
$Show $CXX ${final_cppflags} $PROFILE_INCPATHS ${final_cxxflags} ${final_ldflags} $allargs -I\"${includedir}\"
rc=$?
else
$Show $CXX $CXXFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $shllibpath $cxxlibs $mpilibs $I_MPI_OTHERLIBS ${final_ldflags}
$Show $CXX $CXXFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $shllibpath $cxxlibs $mpilibs $I_MPI_OTHERLIBS $LDFLAGS
rc=$?
if [ $rc -eq 0 -a "x$strip_debug_info" = "xyes" ] ; then
$Show objcopy --only-keep-debug ${executable} ${executable}.dbg
Expand Down
2 changes: 0 additions & 2 deletions examples/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ foreach(src ${sources})
target_link_libraries(${executable} PUBLIC dl)
target_link_libraries(${executable} PUBLIC -L${I_MPI_ROOT}/lib/release/)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC -L${LIBFABRIC_LIB_DIR})
target_link_libraries(${executable} PUBLIC fabric)
target_link_libraries(${executable} PUBLIC ${COMPUTE_BACKEND_TARGET_NAME})
install(TARGETS ${executable} RUNTIME DESTINATION ${CCL_INSTALL_EXAMPLES}/benchmark OPTIONAL)
endforeach()
11 changes: 10 additions & 1 deletion examples/benchmark/include/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,11 @@ void adjust_user_options(user_options_t& options) {
adjust_elem_counts(options);
}

bool is_inplace_supported(const std::string& coll,
const std::initializer_list<std::string>& supported_colls) {
return std::find(supported_colls.begin(), supported_colls.end(), coll) != supported_colls.end();
}

int parse_user_options(int& argc, char**(&argv), user_options_t& options) {
int ch;
int errors = 0;
Expand Down Expand Up @@ -716,8 +721,12 @@ int parse_user_options(int& argc, char**(&argv), user_options_t& options) {
}

if (options.inplace) {
//TODO: "allgatherv"
std::initializer_list<std::string> supported_colls = { "allreduce",
"alltoall",
"alltoallv" };
for (auto name : options.coll_names) {
if (name != "allreduce") {
if (!is_inplace_supported(name, supported_colls)) {
PRINT("inplace is not supported for %s yet", name.c_str());
errors++;
break;
Expand Down
1 change: 0 additions & 1 deletion examples/benchmark/src/allgatherv/sycl_allgatherv_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ struct sycl_allgatherv_coll : sycl_base_coll<Dtype, allgatherv_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count; e_idx++) {
value = host_send_buf[e_idx];
if (value != sbuf_expected) {
Expand Down
3 changes: 1 addition & 2 deletions examples/benchmark/src/alltoall/sycl_alltoall_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ struct sycl_alltoall_coll : sycl_base_coll<Dtype, alltoall_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count * comm_size; e_idx++) {
value = host_send_buf[e_idx];
Dtype rbuf_expected = get_val<Dtype>(static_cast<float>(e_idx / elem_count));
if (value != sbuf_expected) {
if (!base_coll::get_inplace() && value != sbuf_expected) {
std::cout << this->name() << " send_bufs: buf_idx " << b_idx << ", rank_idx "
<< rank_idx << ", elem_idx " << e_idx << ", expected "
<< sbuf_expected << ", got " << value << std::endl;
Expand Down
3 changes: 1 addition & 2 deletions examples/benchmark/src/alltoallv/sycl_alltoallv_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ struct sycl_alltoallv_coll : sycl_base_coll<Dtype, alltoallv_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count * comm_size; e_idx++) {
value = host_send_buf[e_idx];
Dtype rbuf_expected = get_val<Dtype>(static_cast<float>(e_idx / elem_count));
if (value != sbuf_expected) {
if (!base_coll::get_inplace() && value != sbuf_expected) {
std::cout << this->name() << " send_bufs: buf_idx " << b_idx << ", rank_idx "
<< rank_idx << ", elem_idx " << e_idx << ", expected "
<< sbuf_expected << ", got " << value << std::endl;
Expand Down
4 changes: 0 additions & 4 deletions examples/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,5 @@ foreach(src ${sources})
target_link_libraries(${executable} PUBLIC rt)
target_link_libraries(${executable} PUBLIC m)
target_link_libraries(${executable} PUBLIC dl)
target_link_libraries(${executable} PUBLIC -L${I_MPI_ROOT}/lib/release/)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC -L${LIBFABRIC_LIB_DIR})
target_link_libraries(${executable} PUBLIC fabric)
install(TARGETS ${executable} RUNTIME DESTINATION ${CCL_INSTALL_EXAMPLES}/common OPTIONAL)
endforeach()
2 changes: 0 additions & 2 deletions examples/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ foreach(src ${sources})
target_link_libraries(${executable} PUBLIC stdc++)
target_link_libraries(${executable} PUBLIC -L${I_MPI_ROOT}/lib/release/)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC -L${LIBFABRIC_LIB_DIR})
target_link_libraries(${executable} PUBLIC fabric)
target_link_libraries(${executable} PUBLIC ${COMPUTE_BACKEND_TARGET_NAME})
install(TARGETS ${executable} RUNTIME DESTINATION ${CCL_INSTALL_EXAMPLES}/cpu OPTIONAL)
endforeach()
Expand Down
2 changes: 0 additions & 2 deletions examples/external_launcher/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ foreach(src ${sources})
target_link_libraries(${executable} PRIVATE ccl)
target_link_libraries(${executable} PUBLIC -L${I_MPI_ROOT}/lib/release/)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC -L${LIBFABRIC_LIB_DIR})
target_link_libraries(${executable} PUBLIC fabric)
target_link_libraries(${executable} PUBLIC ${COMPUTE_BACKEND_TARGET_NAME})
install(TARGETS ${executable} RUNTIME DESTINATION ${CCL_INSTALL_EXAMPLES}/external_launcher OPTIONAL)
endforeach()
Expand Down
33 changes: 0 additions & 33 deletions examples/include/base_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,39 +160,6 @@ inline void str_to_array(const char* input, std::vector<T>& output, char delimit
}
}
}
template <>
inline void str_to_array(const char* input, std::vector<std::string>& output, char delimiter) {
std::string processes_input(input);

processes_input.erase(std::remove_if(processes_input.begin(),
processes_input.end(),
[](unsigned char x) {
return std::isspace(x);
}),
processes_input.end());

std::replace(processes_input.begin(), processes_input.end(), delimiter, ' ');
std::stringstream ss(processes_input);

while (ss >> processes_input) {
output.push_back(processes_input);
}
}

template <typename T>
void str_to_mset(const char* input, std::multiset<T>& output, char delimiter) {
if (!input) {
return;
}
std::stringstream ss(input);
T temp{};
while (ss >> temp) {
output.insert(temp);
if (ss.peek() == delimiter) {
ss.ignore();
}
}
}

template <class Container>
std::string vec_to_string(Container& elems) {
Expand Down
2 changes: 0 additions & 2 deletions examples/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ foreach(src ${sources})
target_link_libraries(${executable} PRIVATE ccl)
target_link_libraries(${executable} PUBLIC -L${I_MPI_ROOT}/lib/release/)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC -L${LIBFABRIC_LIB_DIR})
target_link_libraries(${executable} PUBLIC fabric)
target_link_libraries(${executable} PRIVATE ${COMPUTE_BACKEND_TARGET_NAME})
install(TARGETS ${executable} RUNTIME DESTINATION ${CCL_INSTALL_EXAMPLES}/sycl OPTIONAL)
endforeach()
125 changes: 125 additions & 0 deletions examples/sycl/sycl_alltoallv_inplace_usm_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
Copyright 2016-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "sycl_base.hpp"

using namespace std;
using namespace sycl;

int main(int argc, char *argv[]) {
const size_t count = 10 * 1024 * 1024;

int size = 0;
int rank = 0;

ccl::init();

MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

atexit(mpi_finalize);

queue q;
if (!create_sycl_queue(argc, argv, rank, q)) {
return -1;
}

buf_allocator<int> allocator(q);

auto usm_alloc_type = usm::alloc::shared;
if (argc > 2) {
usm_alloc_type = usm_alloc_type_from_string(argv[2]);
}

if (!check_sycl_usm(q, usm_alloc_type)) {
return -1;
}

/* create kvs */
ccl::shared_ptr_class<ccl::kvs> kvs;
ccl::kvs::address_type main_addr;
if (rank == 0) {
kvs = ccl::create_main_kvs();
main_addr = kvs->get_address();
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD);
kvs = ccl::create_kvs(main_addr);
}

/* create communicator */
auto dev = ccl::create_device(q.get_device());
auto ctx = ccl::create_context(q.get_context());
auto comm = ccl::create_communicator(size, rank, dev, ctx, kvs);

/* create stream */
auto stream = ccl::create_stream(q);

/* create buffers */
auto recv_buf = allocator.allocate(count * size, usm_alloc_type);

vector<size_t> recv_counts(size, count);

/* open buffers and modify them on the device side */
auto e = q.submit([&](auto &h) {
h.parallel_for(count * size, [=](auto id) {
recv_buf[id] = id / count + 1;
});
});

/* do not wait completion of kernel and provide it as dependency for operation */
vector<ccl::event> deps;
deps.push_back(ccl::create_event(e));

/* invoke alltoallv */
auto attr = ccl::create_operation_attr<ccl::alltoallv_attr>();
ccl::alltoallv(recv_buf, recv_counts, recv_buf, recv_counts, comm, stream, attr, deps).wait();

/* open recv_buf recv_buf and check its correctness on the device side */
buffer<int> check_buf(count * size);
q.submit([&](auto &h) {
accessor check_buf_acc(check_buf, h, write_only);
h.parallel_for(count * size, [=](auto id) {
if (recv_buf[id] != rank + 1) {
check_buf_acc[id] = -1;
}
else {
check_buf_acc[id] = 0;
}
});
});

if (!handle_exception(q))
return -1;

/* print out the result of the test on the host side */
{
host_accessor check_buf_acc(check_buf, read_only);
size_t i;
for (i = 0; i < size * count; i++) {
if (check_buf_acc[i] == -1) {
cout << "FAILED\n";
break;
}
}
if (i == size * count) {
cout << "PASSED\n";
}
}

return 0;
}
Loading

0 comments on commit 0ab9bf9

Please sign in to comment.