Skip to content

Commit

Permalink
Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.6
Browse files Browse the repository at this point in the history
  • Loading branch information
Taru Doodi committed Aug 24, 2022
1 parent b7d66de commit 03c500c
Show file tree
Hide file tree
Showing 59 changed files with 2,393 additions and 1,485 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ file(GLOB spv_kernels "${PROJECT_SOURCE_DIR}/src/kernels/kernels.spv")
endif()

set(CCL_MAJOR_VERSION "2021")
set(CCL_MINOR_VERSION "5")
set(CCL_UPDATE_VERSION "2")
set(CCL_MINOR_VERSION "6")
set(CCL_UPDATE_VERSION "0")
set(CCL_PRODUCT_STATUS "Gold")
string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
get_vcs_properties("git")
Expand Down
7 changes: 7 additions & 0 deletions deps/itt/include/ittnotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -3985,6 +3985,13 @@ ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* hist, size_t length,
* @return collection state as a enum __itt_collection_state
*/
__itt_collection_state __itt_get_collection_state(void);

/**
* @brief function releases resources allocated by ITT API static part
* this API should be called from the library destructor
* @return void
*/
void __itt_release_resources(void);
/** @endcond */

#ifdef __cplusplus
Expand Down
Binary file modified deps/itt/lib64/libittnotify.a
Binary file not shown.
Binary file added deps/itt/lib64/tracing_functions.so
Binary file not shown.
12 changes: 10 additions & 2 deletions deps/mpi/bin/mpigcc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ fi
# Determined by a combination of environment variables and tests within
# configure (e.g., determining whehter -lsocket is needee)
CC="gcc"
MPICH_VERSION="3.4a2"
MPICH_VERSION="3.3"
CFLAGS=""
CPPFLAGS=""
LDFLAGS=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
LIBS="-lm -lpthread -lfabric -lrt "
MPIVERSION="2021.5"
MPILIBNAME="mpi"

Expand Down Expand Up @@ -590,6 +594,10 @@ fi
final_cppflags=" "
final_ldflags=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
final_libs="-lpthread -lrt "
if test "no" = "no" -o "${interlib_deps}" = "no" ; then
final_ldflags="${final_ldflags} -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl"
final_libs="${final_libs} -lm -lpthread -lfabric -lrt "
fi

# -----------------------------------------------------------------------
#
Expand All @@ -614,7 +622,7 @@ if [ "$linking" = yes ] ; then
$Show $CC ${final_cppflags} $PROFILE_INCPATHS ${final_cflags} ${final_ldflags} $allargs -I\"${includedir}\"
rc=$?
else
$Show $CC $CPPFLAGS $CFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $mpilibs $I_MPI_OTHERLIBS ${final_ldflags}
$Show $CC $CPPFLAGS $CFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $mpilibs $I_MPI_OTHERLIBS $LDFLAGS
rc=$?

if [ $rc -eq 0 -a "x$strip_debug_info" = "xyes" ] ; then
Expand Down
11 changes: 9 additions & 2 deletions deps/mpi/bin/mpigxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ fi

# Default settings for compiler, flags, and libraries
CXX="g++"
MPICH_VERSION="3.4a2"
MPICH_VERSION="3.3"
CXXFLAGS=""
LDFLAGS=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
LIBS="-lm -lpthread -lfabric -lrt "
MPIVERSION="2021.5"
MPILIBNAME="mpi"
MPICXXLIBNAME="mpicxx"
Expand Down Expand Up @@ -603,6 +606,10 @@ fi
final_cppflags=" "
final_ldflags=" -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl "
final_libs="-lpthread -lrt "
if test "no" = "no" -o "${interlib_deps}" = "no" ; then
final_ldflags="${final_ldflags} -Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Xlinker --enable-new-dtags -ldl"
final_libs="${final_libs} -lm -lpthread -lfabric -lrt "
fi

# A temporary statement to invoke the compiler
# Place the -L before any args incase there are any mpi libraries in there.
Expand All @@ -618,7 +625,7 @@ if [ "$linking" = yes ] ; then
$Show $CXX ${final_cppflags} $PROFILE_INCPATHS ${final_cxxflags} ${final_ldflags} $allargs -I\"${includedir}\"
rc=$?
else
$Show $CXX $CXXFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $shllibpath $cxxlibs $mpilibs $I_MPI_OTHERLIBS ${final_ldflags}
$Show $CXX $CXXFLAGS $allargs -I\"${includedir}\" -L\"${libdir}${MPILIBDIR}\" -L\"${libdir}\" $rpath_opt $shllibpath $cxxlibs $mpilibs $I_MPI_OTHERLIBS $LDFLAGS
rc=$?
if [ $rc -eq 0 -a "x$strip_debug_info" = "xyes" ] ; then
$Show objcopy --only-keep-debug ${executable} ${executable}.dbg
Expand Down
11 changes: 10 additions & 1 deletion examples/benchmark/include/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,11 @@ void adjust_user_options(user_options_t& options) {
adjust_elem_counts(options);
}

bool is_inplace_supported(const std::string& coll,
const std::initializer_list<std::string>& supported_colls) {
return std::find(supported_colls.begin(), supported_colls.end(), coll) != supported_colls.end();
}

int parse_user_options(int& argc, char**(&argv), user_options_t& options) {
int ch;
int errors = 0;
Expand Down Expand Up @@ -716,8 +721,12 @@ int parse_user_options(int& argc, char**(&argv), user_options_t& options) {
}

if (options.inplace) {
//TODO: "allgatherv"
std::initializer_list<std::string> supported_colls = { "allreduce",
"alltoall",
"alltoallv" };
for (auto name : options.coll_names) {
if (name != "allreduce") {
if (!is_inplace_supported(name, supported_colls)) {
PRINT("inplace is not supported for %s yet", name.c_str());
errors++;
break;
Expand Down
1 change: 0 additions & 1 deletion examples/benchmark/src/allgatherv/sycl_allgatherv_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ struct sycl_allgatherv_coll : sycl_base_coll<Dtype, allgatherv_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count; e_idx++) {
value = host_send_buf[e_idx];
if (value != sbuf_expected) {
Expand Down
3 changes: 1 addition & 2 deletions examples/benchmark/src/alltoall/sycl_alltoall_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ struct sycl_alltoall_coll : sycl_base_coll<Dtype, alltoall_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count * comm_size; e_idx++) {
value = host_send_buf[e_idx];
Dtype rbuf_expected = get_val<Dtype>(static_cast<float>(e_idx / elem_count));
if (value != sbuf_expected) {
if (!base_coll::get_inplace() && value != sbuf_expected) {
std::cout << this->name() << " send_bufs: buf_idx " << b_idx << ", rank_idx "
<< rank_idx << ", elem_idx " << e_idx << ", expected "
<< sbuf_expected << ", got " << value << std::endl;
Expand Down
3 changes: 1 addition & 2 deletions examples/benchmark/src/alltoallv/sycl_alltoallv_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ struct sycl_alltoallv_coll : sycl_base_coll<Dtype, alltoallv_strategy_impl> {
}

Dtype value;

for (size_t e_idx = 0; e_idx < elem_count * comm_size; e_idx++) {
value = host_send_buf[e_idx];
Dtype rbuf_expected = get_val<Dtype>(static_cast<float>(e_idx / elem_count));
if (value != sbuf_expected) {
if (!base_coll::get_inplace() && value != sbuf_expected) {
std::cout << this->name() << " send_bufs: buf_idx " << b_idx << ", rank_idx "
<< rank_idx << ", elem_idx " << e_idx << ", expected "
<< sbuf_expected << ", got " << value << std::endl;
Expand Down
33 changes: 0 additions & 33 deletions examples/include/base_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,39 +160,6 @@ inline void str_to_array(const char* input, std::vector<T>& output, char delimit
}
}
}
template <>
inline void str_to_array(const char* input, std::vector<std::string>& output, char delimiter) {
std::string processes_input(input);

processes_input.erase(std::remove_if(processes_input.begin(),
processes_input.end(),
[](unsigned char x) {
return std::isspace(x);
}),
processes_input.end());

std::replace(processes_input.begin(), processes_input.end(), delimiter, ' ');
std::stringstream ss(processes_input);

while (ss >> processes_input) {
output.push_back(processes_input);
}
}

template <typename T>
void str_to_mset(const char* input, std::multiset<T>& output, char delimiter) {
if (!input) {
return;
}
std::stringstream ss(input);
T temp{};
while (ss >> temp) {
output.insert(temp);
if (ss.peek() == delimiter) {
ss.ignore();
}
}
}

template <class Container>
std::string vec_to_string(Container& elems) {
Expand Down
125 changes: 125 additions & 0 deletions examples/sycl/sycl_alltoallv_inplace_usm_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
Copyright 2016-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "sycl_base.hpp"

using namespace std;
using namespace sycl;

int main(int argc, char *argv[]) {
const size_t count = 10 * 1024 * 1024;

int size = 0;
int rank = 0;

ccl::init();

MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

atexit(mpi_finalize);

queue q;
if (!create_sycl_queue(argc, argv, rank, q)) {
return -1;
}

buf_allocator<int> allocator(q);

auto usm_alloc_type = usm::alloc::shared;
if (argc > 2) {
usm_alloc_type = usm_alloc_type_from_string(argv[2]);
}

if (!check_sycl_usm(q, usm_alloc_type)) {
return -1;
}

/* create kvs */
ccl::shared_ptr_class<ccl::kvs> kvs;
ccl::kvs::address_type main_addr;
if (rank == 0) {
kvs = ccl::create_main_kvs();
main_addr = kvs->get_address();
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast((void *)main_addr.data(), main_addr.size(), MPI_BYTE, 0, MPI_COMM_WORLD);
kvs = ccl::create_kvs(main_addr);
}

/* create communicator */
auto dev = ccl::create_device(q.get_device());
auto ctx = ccl::create_context(q.get_context());
auto comm = ccl::create_communicator(size, rank, dev, ctx, kvs);

/* create stream */
auto stream = ccl::create_stream(q);

/* create buffers */
auto recv_buf = allocator.allocate(count * size, usm_alloc_type);

vector<size_t> recv_counts(size, count);

/* open buffers and modify them on the device side */
auto e = q.submit([&](auto &h) {
h.parallel_for(count * size, [=](auto id) {
recv_buf[id] = id / count + 1;
});
});

/* do not wait completion of kernel and provide it as dependency for operation */
vector<ccl::event> deps;
deps.push_back(ccl::create_event(e));

/* invoke alltoallv */
auto attr = ccl::create_operation_attr<ccl::alltoallv_attr>();
ccl::alltoallv(recv_buf, recv_counts, recv_buf, recv_counts, comm, stream, attr, deps).wait();

/* open recv_buf recv_buf and check its correctness on the device side */
buffer<int> check_buf(count * size);
q.submit([&](auto &h) {
accessor check_buf_acc(check_buf, h, write_only);
h.parallel_for(count * size, [=](auto id) {
if (recv_buf[id] != rank + 1) {
check_buf_acc[id] = -1;
}
else {
check_buf_acc[id] = 0;
}
});
});

if (!handle_exception(q))
return -1;

/* print out the result of the test on the host side */
{
host_accessor check_buf_acc(check_buf, read_only);
size_t i;
for (i = 0; i < size * count; i++) {
if (check_buf_acc[i] == -1) {
cout << "FAILED\n";
break;
}
}
if (i == size * count) {
cout << "PASSED\n";
}
}

return 0;
}
14 changes: 4 additions & 10 deletions src/atl/ofi/atl_ofi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1026,8 +1026,7 @@ void atl_ofi::mr_cache::get(fid_domain* domain, void* buf, size_t bytes, fid_mr*
mr_attr.iface = FI_HMEM_SYSTEM;
mr_attr.device.ze = 0;

CCL_THROW_IF_NOT(ccl::global_data::get().ze_data->context_list.at(0), "ze context is null");
ze_context_handle_t context = ccl::global_data::get().ze_data->context_list[0];
ze_context_handle_t context = ccl::global_data::get().ze_data->contexts[0];
ze_memory_allocation_properties_t alloc_props = ccl::ze::default_alloc_props;
ze_device_handle_t alloc_dev = nullptr;
ZE_CALL(zeMemGetAllocProperties, (context, buf, &alloc_props, &alloc_dev));
Expand All @@ -1044,14 +1043,9 @@ void atl_ofi::mr_cache::get(fid_domain* domain, void* buf, size_t bytes, fid_mr*
ZE_CALL(zeDeviceGetProperties, (alloc_dev, &alloc_dev_props));

int dev_idx = -1;
int device_count = static_cast<int>(ccl::global_data::get().ze_data->device_list.size());
for (int idx = 0; idx < device_count; idx++) {
ze_device_properties_t dev_props = ccl::ze::default_device_props;
ze_device_handle_t device = ccl::global_data::get().ze_data->device_list[idx].device;
ZE_CALL(zeDeviceGetProperties, (device, &dev_props));

if (!std::memcmp(&dev_props.uuid, &alloc_dev_props.uuid, sizeof(ze_device_uuid_t))) {
dev_idx = ccl::global_data::get().ze_data->device_list[idx].parent_idx;
for (const auto& dev : ccl::global_data::get().ze_data->devices) {
if (ccl::ze::is_same_dev_uuid(dev.uuid, alloc_dev_props.uuid)) {
dev_idx = dev.parent_idx;
LOG_DEBUG("buf ", buf, " corresponds to ze device idx ", dev_idx);
break;
}
Expand Down
Loading

0 comments on commit 03c500c

Please sign in to comment.