Skip to content

Commit

Permalink
Merge branch 'release-v0.96'
Browse files Browse the repository at this point in the history
============================== Release Notes: v0.96 ==============================
Support for new layers:
 - Log softmax
 - Basic math functions
 - Weights layer, which outputs a weights tensor
 - L2 norm squared
 - Binary cross entropy loss and sigmoid binary cross entropy loss
 - Boolean accuracy, Boolean false negative rate, Boolean false positive rate
 - Bilinear resize
 - Variance and covariance
 - Dilated and grouped convolution (GPU only)

Performance optimizations:
 - Optimized GPU model-parallel softmax layer

Model portability & usability:
 - Option for weight initialization with user-provided list of values
 - Callback to save any layer output as an image

Internal features:
 - Provide compile time option to selectively disable OpenMP for data fetching loop
 - Thrust calls no longer involve the default CUDA stream

I/O & data readers:
 - Reworked jag_conduit data reader:
   - Support the updated JAG simulation data output format
   - Use direct HDF5 I/O for on-demand data loading with Conduit
   - Ingest a unique set of data files per instance
   - Allow exclusive data partitioning among multiple trainers
   - Multi-channel images
   - Normalization of JAG data
   - Interface to select images of specific views and time indices
   - Interface to describe how to slice JAG data
   - Avoid redundant fetching and incoherent random number pulls in the group of local data readers
 - Improved threading performance by preallocating scratch space for loading samples

Build system:
 - Support cross-compilation configurations in superbuild and SetupProtobuf
  • Loading branch information
bvanessen committed Nov 14, 2018
2 parents ffecbef + 07d6f36 commit cd7350e
Show file tree
Hide file tree
Showing 296 changed files with 20,185 additions and 10,130 deletions.
118 changes: 76 additions & 42 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,45 @@ endif ()
# Version setup
#

set(LBANN_VERSION_MAJOR 0)
set(LBANN_VERSION_MINOR 96)

set(LBANN_VERSION "${LBANN_VERSION_MAJOR}.${LBANN_VERSION_MINOR}")

# Check to see if we are in a git repo
execute_process(
COMMAND git rev-parse --is-inside-work-tree
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_REPO
OUTPUT_STRIP_TRAILING_WHITESPACE)

if (GIT_REPO)
# Get the git version so that we can embed it into the executable
find_program(__GIT_EXECUTABLE git)
mark_as_advanced(__GIT_EXECUTABLE)
if (__GIT_EXECUTABLE)

execute_process(
COMMAND git --git-dir .git describe --abbrev=7 --dirty --always --tags
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_VERSION
COMMAND ${__GIT_EXECUTABLE} rev-parse --is-inside-work-tree
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE __BUILDING_FROM_GIT_SOURCES
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(${UPPER_PROJECT_NAME}_VERSION ${GIT_VERSION}
CACHE STRING "LBANN's version string")
else ()
set(${UPPER_PROJECT_NAME}_VERSION v0.95
CACHE STRING "LBANN's version string")
endif (GIT_REPO)

if (__BUILDING_FROM_GIT_SOURCES)
# Get the git version so that we can embed it into the executable
execute_process(
COMMAND ${__GIT_EXECUTABLE} rev-parse --show-toplevel
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE __GIT_TOPLEVEL_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${__GIT_EXECUTABLE} rev-parse --git-dir
WORKING_DIRECTORY "${__GIT_TOPLEVEL_DIR}"
OUTPUT_VARIABLE __GIT_GIT_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${__GIT_EXECUTABLE} --git-dir "${__GIT_GIT_DIR}" describe
--abbrev=7 --always --dirty --tags
WORKING_DIRECTORY "${__GIT_TOPLEVEL_DIR}"
OUTPUT_VARIABLE __GIT_DESCRIBE_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)

set(LBANN_GIT_VERSION "${__GIT_DESCRIBE_VERSION}"
CACHE STRING "LBANN's version string as told by git.")
endif (__BUILDING_FROM_GIT_SOURCES)
endif (__GIT_EXECUTABLE)

if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux")
set(LBANN_GNU_LINUX TRUE)
Expand Down Expand Up @@ -214,7 +233,7 @@ endif (LBANN_HAS_CUDA)
# guarantee. There's no harm including it multiple times.
find_library(DL_LIBRARY dl DOC "The dynamic loader library.")
if (DL_LIBRARY)
message("Found dl: ${DL_LIBRARY}")
message(STATUS "Found dl: ${DL_LIBRARY}")
else ()
message(FATAL_ERROR
"dl library not found! This is a required library.\n"
Expand Down Expand Up @@ -401,32 +420,47 @@ get_directory_property( DirDefs COMPILE_DEFINITIONS )
# Configuration summary
################################################################

message("== Configuration Summary ==")
message(" PROJECT_SOURCE_DIR: ${PROJECT_SOURCE_DIR}")
message(" PROJECT_BINARY_DIR: ${PROJECT_BINARY_DIR}")
message(" CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
message(" CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
# NOTE: message() outputs to stderr by default. We now use a string to
# maintain this information and then have cmake echo it to stdout. The
# only side effects are that if you use the CMake GUI, you won't see
# this output anymore (they only report stderr) and that if you add
# something to the list, you must remember your newline!
set(_str "== Configuration Summary ==\n")
string(APPEND _str " PROJECT_SOURCE_DIR: ${PROJECT_SOURCE_DIR}\n"
" PROJECT_BINARY_DIR: ${PROJECT_BINARY_DIR}\n"
" CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}\n"
" CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}\n")
if (CMAKE_BUILD_TYPE MATCHES None)
message(" CXX FLAGS: ${CMAKE_CXX_FLAGS}")
string(APPEND _str
" CXX FLAGS: ${CMAKE_CXX_FLAGS}\n")
elseif (CMAKE_BUILD_TYPE MATCHES Release)
message(" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
string(APPEND _str
" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}\n")
elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
message(" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
string(APPEND _str
" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}\n")
elseif (CMAKE_BUILD_TYPE MATCHES Debug)
message(" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}")
string(APPEND _str
" CXX FLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}\n")
endif ()
message(" LBANN_GNU_LINUX: ${LBANN_GNU_LINUX}")
message(" LBANN_HAS_HYDROGEN: ${LBANN_HAS_HYDROGEN}")
message(" LBANN_HAS_OPENCV: ${LBANN_HAS_OPENCV}")
message(" LBANN_HAS_CUDA: ${LBANN_HAS_CUDA}")
message(" LBANN_HAS_CUDNN: ${LBANN_HAS_CUDNN}")
message(" LBANN_HAS_NCCL2: ${LBANN_HAS_NCCL2}")
message(" LBANN_HAS_PROTOBUF: ${LBANN_HAS_PROTOBUF}")
message(" LBANN_HAS_CNPY: ${LBANN_HAS_CNPY}")
message(" LBANN_HAS_TBINF: ${LBANN_HAS_TBINF}")
message(" LBANN_HAS_VTUNE: ${LBANN_HAS_VTUNE}")
message(" LBANN_NVPROF: ${LBANN_NVPROF}")
message(" LBANN_HAS_DOXYGEN: ${LBANN_HAS_DOXYGEN}")
message(" LBANN_HAS_LBANN_PROTO:${LBANN_HAS_LBANN_PROTO}")
message(" LBANN_HAS_ALUMINUM: ${LBANN_HAS_ALUMINUM}")
message(" LBANN_HAS_CONDUIT: ${LBANN_HAS_CONDUIT}")
string(APPEND _str
" LBANN_GNU_LINUX: ${LBANN_GNU_LINUX}\n"
" LBANN_HAS_HYDROGEN: ${LBANN_HAS_HYDROGEN}\n"
" LBANN_HAS_OPENCV: ${LBANN_HAS_OPENCV}\n"
" LBANN_HAS_CUDA: ${LBANN_HAS_CUDA}\n"
" LBANN_HAS_CUDNN: ${LBANN_HAS_CUDNN}\n"
" LBANN_HAS_NCCL2: ${LBANN_HAS_NCCL2}\n"
" LBANN_HAS_PROTOBUF: ${LBANN_HAS_PROTOBUF}\n"
" LBANN_HAS_CNPY: ${LBANN_HAS_CNPY}\n"
" LBANN_HAS_TBINF: ${LBANN_HAS_TBINF}\n"
" LBANN_HAS_VTUNE: ${LBANN_HAS_VTUNE}\n"
" LBANN_NVPROF: ${LBANN_NVPROF}\n"
" LBANN_HAS_DOXYGEN: ${LBANN_HAS_DOXYGEN}\n"
" LBANN_HAS_LBANN_PROTO:${LBANN_HAS_LBANN_PROTO}\n"
" LBANN_HAS_ALUMINUM: ${LBANN_HAS_ALUMINUM}\n"
" LBANN_HAS_CONDUIT: ${LBANN_HAS_CONDUIT}\n"
" LBANN_NO_OMP_FOR_DATA_READERS: ${LBANN_NO_OMP_FOR_DATA_READERS}\n")

# Output to stdout
execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${_str}")
set(_str)
56 changes: 56 additions & 0 deletions ReleaseNotes.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,59 @@
============================== (Pending) Release Notes: v0.97 ==============================
Support for new training algorithms:

Support for new network structures:

Support for new layers:

Performance optimizations:

Model portability & usability:

Internal features:

I/O & data readers:

Build system:

============================== Release Notes: v0.96 ==============================
Support for new layers:
- Log softmax
- Basic math functions
- Weights layer, which outputs a weights tensor
- L2 norm squared
- Binary cross entropy loss and sigmoid binary cross entropy loss
- Boolean accuracy, Boolean false negative rate, Boolean false positive rate
- Bilinear resize
- Variance and covariance
- Dilated and grouped convolution (GPU only)

Performance optimizations:
- Optimized GPU model-parallel softmax layer

Model portability & usability:
- Option for weight initialization with user-provided list of values
- Callback to save any layer output as an image

Internal features:
- Provide compile time option to selectively disable OpenMP for data fetching loop
- Thrust calls no longer involve the default CUDA stream

I/O & data readers:
- Reworked jag_conduit data reader:
- Support the updated JAG simulation data output format
- Use direct HDF5 I/O for on-demand data loading with Conduit
- Ingest a unique set of data files per instance
- Allow exclusive data partitioning among multiple trainers
- Multi-channel images
- Normalization of JAG data
- Interface to select images of specific views and time indices
- Interface to describe how to slice JAG data
- Avoid redundant fetching and incoherent random number pulls in the group of local data readers
- Improved threading performance by preallocating scratch space for loading samples

Build system:
- Support cross-compilation configurations in superbuild and SetupProtobuf

============================== Release Notes: v0.95 ==============================
Support for new training algorithms:
- Generative Adversarial Networks (GAN)
Expand Down
1 change: 1 addition & 0 deletions bamboo/unit_tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.cache
3 changes: 3 additions & 0 deletions bamboo/unit_tests/error/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!.gitignore
!README.md
3 changes: 3 additions & 0 deletions bamboo/unit_tests/output/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!.gitignore
!README.md
43 changes: 0 additions & 43 deletions bamboo/unit_tests/test_unit_conv_graph.py

This file was deleted.

41 changes: 41 additions & 0 deletions bamboo/unit_tests/test_unit_layer_covariance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys
sys.path.insert(0, '../common_python')
import tools
import pytest
import os

def skeleton_layer_covariance(cluster, executables, dir_name, compiler_name):
if compiler_name not in executables:
pytest.skip('default_exes[%s] does not exist' % compiler_name)
output_file_name = '%s/bamboo/unit_tests/output/layer_covariance_%s_output.txt' % (dir_name, compiler_name)
error_file_name = '%s/bamboo/unit_tests/error/layer_covariance_%s_error.txt' % (dir_name, compiler_name)
command = tools.get_command(
cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
data_filedir_default='', data_reader_name='synthetic',
model_folder='tests/layer_tests', model_name='covariance', optimizer_name='sgd',
output_file_name=output_file_name, error_file_name=error_file_name)
return_code = os.system(command)
assert return_code == 0

def test_unit_layer_covariance_clang4(cluster, exes, dirname):
skeleton_layer_covariance(cluster, exes, dirname, 'clang4')

def test_unit_layer_covariance_gcc4_check(cluster, exes, dirname):
if cluster in ['surface']:
pytest.skip('FIXME')
# Surface Errors:
# assert 34304 == 0
skeleton_layer_covariance(cluster, exes, dirname, 'gcc4')

def test_unit_layer_covariance_gcc7(cluster, exes, dirname):
skeleton_layer_covariance(cluster, exes, dirname, 'gcc7')

def test_unit_layer_covariance_intel18(cluster, exes, dirname):
skeleton_layer_covariance(cluster, exes, dirname, 'intel18')

# Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_covariance_exe' --exe=<executable>
def test_unit_layer_covariance_exe(cluster, dirname, exe):
if exe == None:
pytest.skip('Non-local testing')
exes = {'exe' : exe}
skeleton_layer_covariance(cluster, exes, dirname, 'exe')
41 changes: 41 additions & 0 deletions bamboo/unit_tests/test_unit_layer_l2_norm2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys
sys.path.insert(0, '../common_python')
import tools
import pytest
import os

def skeleton_layer_l2_norm2(cluster, executables, dir_name, compiler_name):
if compiler_name not in executables:
pytest.skip('default_exes[%s] does not exist' % compiler_name)
output_file_name = '%s/bamboo/unit_tests/output/layer_l2_norm2_%s_output.txt' % (dir_name, compiler_name)
error_file_name = '%s/bamboo/unit_tests/error/layer_l2_norm2_%s_error.txt' % (dir_name, compiler_name)
command = tools.get_command(
cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
data_filedir_default='', data_reader_name='synthetic',
model_folder='tests/layer_tests', model_name='l2_norm2', optimizer_name='sgd',
output_file_name=output_file_name, error_file_name=error_file_name)
return_code = os.system(command)
assert return_code == 0

def test_unit_layer_l2_norm2_clang4(cluster, exes, dirname):
skeleton_layer_l2_norm2(cluster, exes, dirname, 'clang4')

def test_unit_layer_l2_norm2_gcc4_check(cluster, exes, dirname):
if cluster in ['surface']:
pytest.skip('FIXME')
# Surface Errors:
# assert 34304 == 0
skeleton_layer_l2_norm2(cluster, exes, dirname, 'gcc4')

def test_unit_layer_l2_norm2_gcc7(cluster, exes, dirname):
skeleton_layer_l2_norm2(cluster, exes, dirname, 'gcc7')

def test_unit_layer_l2_norm2_intel18(cluster, exes, dirname):
skeleton_layer_l2_norm2(cluster, exes, dirname, 'intel18')

# Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_l2_norm2_exe' --exe=<executable>
def test_unit_layer_l2_norm2_exe(cluster, dirname, exe):
if exe == None:
pytest.skip('Non-local testing')
exes = {'exe' : exe}
skeleton_layer_l2_norm2(cluster, exes, dirname, 'exe')
41 changes: 41 additions & 0 deletions bamboo/unit_tests/test_unit_layer_log_softmax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys
sys.path.insert(0, '../common_python')
import tools
import pytest
import os

def skeleton_layer_log_softmax(cluster, executables, dir_name, compiler_name):
if compiler_name not in executables:
pytest.skip('default_exes[%s] does not exist' % compiler_name)
output_file_name = '%s/bamboo/unit_tests/output/layer_log_softmax_%s_output.txt' % (dir_name, compiler_name)
error_file_name = '%s/bamboo/unit_tests/error/layer_log_softmax_%s_error.txt' % (dir_name, compiler_name)
command = tools.get_command(
cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
data_filedir_default='', data_reader_name='synthetic',
model_folder='tests/layer_tests', model_name='log_softmax', optimizer_name='sgd',
output_file_name=output_file_name, error_file_name=error_file_name)
return_code = os.system(command)
assert return_code == 0

def test_unit_layer_log_softmax_clang4(cluster, exes, dirname):
skeleton_layer_log_softmax(cluster, exes, dirname, 'clang4')

def test_unit_layer_log_softmax_gcc4_check(cluster, exes, dirname):
if cluster in ['surface']:
pytest.skip('FIXME')
# Surface Errors:
# assert 34304 == 0
skeleton_layer_log_softmax(cluster, exes, dirname, 'gcc4')

def test_unit_layer_log_softmax_gcc7(cluster, exes, dirname):
skeleton_layer_log_softmax(cluster, exes, dirname, 'gcc7')

def test_unit_layer_log_softmax_intel18(cluster, exes, dirname):
skeleton_layer_log_softmax(cluster, exes, dirname, 'intel18')

# Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_log_softmax_exe' --exe=<executable>
def test_unit_layer_log_softmax_exe(cluster, dirname, exe):
if exe == None:
pytest.skip('Non-local testing')
exes = {'exe' : exe}
skeleton_layer_log_softmax(cluster, exes, dirname, 'exe')
Loading

0 comments on commit cd7350e

Please sign in to comment.