Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lag transforms #6

Merged
merged 46 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6aee2c9
initial lag transforms
jmoralez Oct 26, 2023
79060b1
add more rolling operations
jmoralez Oct 27, 2023
3a30724
include remaining transforms
jmoralez Oct 28, 2023
e3f9ad7
add exponentially weighted mean
jmoralez Nov 1, 2023
8fa0eb0
increase build verbosity
jmoralez Nov 1, 2023
cd11e60
update for apple
jmoralez Nov 1, 2023
6414f3f
link openmp for clang
jmoralez Nov 1, 2023
3a47729
disable omp for arm64
jmoralez Nov 1, 2023
41571ff
set correct script name
jmoralez Nov 1, 2023
e0abcac
move logic to cmake
jmoralez Nov 1, 2023
7e167c3
Revert "move logic to cmake"
jmoralez Nov 1, 2023
6f24a8b
use env variable and add tests
jmoralez Nov 1, 2023
68e87c7
stop trolling
jmoralez Nov 1, 2023
2b60fe4
install tomli
jmoralez Nov 1, 2023
98fe593
use toml
jmoralez Nov 1, 2023
22e7f4f
update env
jmoralez Nov 1, 2023
7a8ba90
install libomp for macos
jmoralez Nov 1, 2023
42492f3
ensure contigous
jmoralez Nov 2, 2023
c66ad79
install libomp for macos
jmoralez Nov 2, 2023
0239828
set linker flags
jmoralez Nov 3, 2023
2591072
restore c stuff
jmoralez Nov 3, 2023
70efd7c
restrict ci to debug
jmoralez Nov 3, 2023
8074b2f
restore link
jmoralez Nov 3, 2023
f09b01a
remove repair wheel for macos
jmoralez Nov 3, 2023
5c59c05
build wheel on macos-11
jmoralez Nov 3, 2023
1ad6a62
set macos deployment target
jmoralez Nov 3, 2023
515e456
use xcode11
jmoralez Nov 3, 2023
a06becf
remove repair wheel for macos
jmoralez Nov 3, 2023
18820f7
test with pip
jmoralez Nov 3, 2023
8389d0f
use pip3
jmoralez Nov 3, 2023
8de3b25
restore repair wheel
jmoralez Nov 3, 2023
aaa43f4
set correct name
jmoralez Nov 3, 2023
9498ff5
._.
jmoralez Nov 3, 2023
3cea72c
maybe
jmoralez Nov 3, 2023
a41d6c8
maybe2
jmoralez Nov 3, 2023
671fb7d
remove cache
jmoralez Nov 3, 2023
6ddfca5
bash shell
jmoralez Nov 3, 2023
b60473a
restore ci
jmoralez Nov 3, 2023
db32b6f
add importlib_resources
jmoralez Nov 3, 2023
0688f2b
split api by dtype
jmoralez Nov 3, 2023
6029122
improve scalers naming
jmoralez Nov 3, 2023
7f80c85
parametrize dtype in lag_tfms tests
jmoralez Nov 3, 2023
e4273a7
increase rtol
jmoralez Nov 3, 2023
253d8fd
increase rtol again
jmoralez Nov 3, 2023
2c99fd2
rtol again
jmoralez Nov 3, 2023
1339322
rtol again
jmoralez Nov 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

defaults:
run:
shell: bash -l {0}
shell: bash

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -27,7 +27,7 @@ jobs:
platform-id: manylinux_x86_64
- os: windows-2019
platform-id: win_amd64
- os: macos-latest
- os: macos-11
platform-id: macosx_x86_64
- os: macos-latest
platform-id: macosx_arm64
Expand All @@ -41,6 +41,11 @@ jobs:
CIBW_BUILD: cp38-${{ matrix.platform-id }}
CIBW_ARCHS: all
CIBW_TEST_SKIP: '*-macosx_arm64'
CIBW_BUILD_VERBOSITY: 3
CIBW_BEFORE_BUILD_MACOS: |
sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer
pip install toml
python scripts/disable_omp_arm64.py

- uses: actions/upload-artifact@v3
with:
Expand All @@ -59,12 +64,13 @@ jobs:
- name: Clone repo
uses: actions/checkout@v3

- name: Set up environment
uses: mamba-org/setup-micromamba@v1
- uses: actions/setup-python@v4
with:
environment-file: environment.yml
create-args: python=${{ matrix.python-version }}
cache-environment: true
python-version: ${{ matrix.python-version }}
cache: pip

- name: Install dependencies
run: pip install -r requirements-test.txt

- name: Download wheels
uses: actions/download-artifact@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*.so
*.dylib
*.dll
*.exp

# Fortran module files
*.mod
Expand Down
28 changes: 26 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
cmake_minimum_required(VERSION 3.25)
project(coreforecast)

option(USE_OPENMP "Enable OpenMP" ON)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

set(CMAKE_CXX_STANDARD 17)
if(USE_OPENMP)
if(APPLE)
find_package(OpenMP)
if(NOT OpenMP_FOUND)
# libomp 15.0+ from brew is keg-only, so have to search in other locations.
# See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
execute_process(COMMAND brew --prefix libomp
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_C_LIB_NAMES omp)
set(OpenMP_CXX_LIB_NAMES omp)
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
endif()
endif()
find_package(OpenMP REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

if(APPLE)
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
Expand All @@ -14,10 +35,9 @@ endif()
if(UNIX)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -O3 -Wall -Wextra -Wpedantic")
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ot /Oy /W4")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob2 /Ot /Oy /W4")
endif()


if(SKBUILD)
set(LIBRARY_OUTPUT_PATH ${SKBUILD_PLATLIB_DIR}/coreforecast/lib)
else()
Expand All @@ -29,3 +49,7 @@ add_library(coreforecast SHARED src/coreforecast.cpp)
if(MSVC)
set_target_properties(coreforecast PROPERTIES OUTPUT_NAME "libcoreforecast")
endif()

if(USE_OPENMP AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_libraries(coreforecast PUBLIC OpenMP::OpenMP_CXX)
endif()
163 changes: 143 additions & 20 deletions coreforecast/grouped_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
from importlib.resources import files


DTYPE_FLOAT32 = ctypes.c_int(0)
DTYPE_FLOAT64 = ctypes.c_int(1)

if platform.system() in ("Windows", "Microsoft"):
prefix = "Release"
extension = "dll"
Expand All @@ -30,63 +27,189 @@ def _data_as_void_ptr(arr: np.ndarray):


class GroupedArray:
def __init__(self, data: np.ndarray, indptr: np.ndarray):
def __init__(self, data: np.ndarray, indptr: np.ndarray, num_threads: int = 1):
data = np.ascontiguousarray(data, dtype=data.dtype)
if data.dtype == np.float32:
self.dtype = DTYPE_FLOAT32
self.prefix = "GroupedArrayFloat32"
elif data.dtype == np.float64:
self.dtype = DTYPE_FLOAT64
self.prefix = "GroupedArrayFloat64"
else:
self.dtype = DTYPE_FLOAT32
self.prefix = "GroupedArrayFloat32"
data = data.astype(np.float32)
self.data = data
if indptr.dtype != np.int32:
indptr = indptr.astype(np.int32)
self.indptr = indptr
self._handle = ctypes.c_void_p()
_LIB.GroupedArray_CreateFromArrays(
_LIB[f"{self.prefix}_Create"](
_data_as_void_ptr(data),
ctypes.c_int32(data.size),
indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(indptr.size),
self.dtype,
ctypes.c_int(indptr.size),
ctypes.c_int(num_threads),
ctypes.byref(self._handle),
)

def __del__(self):
_LIB.GroupedArray_Delete(self._handle, self.dtype)
_LIB[f"{self.prefix}_Delete"](self._handle)

def __len__(self):
return self.indptr.size - 1

def __getitem__(self, i):
return self.data[self.indptr[i] : self.indptr[i + 1]]

def scaler_fit(self, stats_fn_name: str) -> np.ndarray:
stats = np.full((len(self), 2), np.nan, dtype=self.data.dtype)
stats_fn = _LIB[stats_fn_name]
stats_fn(
def scaler_fit(self, scaler_type: str) -> np.ndarray:
stats = np.full_like(self.data, np.nan, shape=(len(self), 2))
_LIB[f"{self.prefix}_{scaler_type}ScalerStats"](
self._handle,
self.dtype,
_data_as_void_ptr(stats),
)
return stats

def scaler_transform(self, stats: np.ndarray) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB.GroupedArray_ScalerTransform(
_LIB[f"{self.prefix}_ScalerTransform"](
self._handle,
_data_as_void_ptr(stats),
self.dtype,
_data_as_void_ptr(out),
)
return out

def scaler_inverse_transform(self, stats: np.ndarray) -> np.ndarray:
out = np.empty_like(self.data)
_LIB.GroupedArray_ScalerInverseTransform(
_LIB[f"{self.prefix}_ScalerInverseTransform"](
self._handle,
_data_as_void_ptr(stats),
self.dtype,
_data_as_void_ptr(out),
)
return out

def take_from_groups(self, k: int) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_TakeFromGroups"](
self._handle,
ctypes.c_int(k),
_data_as_void_ptr(out),
)
return out

def lag_transform(self, lag: int) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_LagTransform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
)
return out

def rolling_transform(
self, stat_name: str, lag: int, window_size: int, min_samples: int
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Rolling{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def rolling_update(
self, stat_name: str, lag: int, window_size: int, min_samples: int
) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_Rolling{stat_name}Update"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def seasonal_rolling_transform(
self,
stat_name: str,
lag: int,
season_length: int,
window_size: int,
min_samples: int,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_SeasonalRolling{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(season_length),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def seasonal_rolling_update(
self,
stat_name: str,
lag: int,
season_length: int,
window_size: int,
min_samples: int,
) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_SeasonalRolling{stat_name}Update"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(season_length),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def expanding_transform_with_aggs(
self,
stat_name: str,
lag: int,
aggs: np.ndarray,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Expanding{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
_data_as_void_ptr(aggs),
)
return out

def expanding_transform(
self,
stat_name: str,
lag: int,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Expanding{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
)
return out

def exponentially_weighted_transform(
self,
stat_name: str,
lag: int,
alpha: float,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
if self.prefix == "GroupedArrayFloat32":
alpha = ctypes.c_float(alpha)
else:
alpha = ctypes.c_double(alpha)
_LIB[f"{self.prefix}_ExponentiallyWeighted{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
alpha,
_data_as_void_ptr(out),
)
return out
Loading