add single array transformations (#32)

Nixtla · May 3, 2024 · e83483c · e83483c
1 parent 3d20922
commit e83483c
Show file tree

Hide file tree

Showing 21 changed files with 515 additions and 69 deletions.
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
@@ -4,7 +4,8 @@
             "name": "CMake deps",
             "includePath": [
                 "${workspaceFolder}/include",
-                "${workspaceFolder}/build/_deps/**/include"
+                "${workspaceFolder}/build/_deps/**/include",
+                "${workspaceFolder}/build/_deps/**/cpp",
             ]
         }
     ],

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,22 +9,25 @@ if(NOT CMAKE_BUILD_TYPE)
 endif()
 
 set(CMAKE_CXX_STANDARD 17)
+
 if(USE_OPENMP)
     if(APPLE)
         find_package(OpenMP)
+
         if(NOT OpenMP_FOUND)
             # libomp 15.0+ from brew is keg-only, so have to search in other locations.
             # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
             execute_process(COMMAND brew --prefix libomp
-                            OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
-                            OUTPUT_STRIP_TRAILING_WHITESPACE)
+                OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+                OUTPUT_STRIP_TRAILING_WHITESPACE)
             set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
             set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
             set(OpenMP_C_LIB_NAMES omp)
             set(OpenMP_CXX_LIB_NAMES omp)
             set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
         endif()
     endif()
+
     find_package(OpenMP REQUIRED)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 endif()
@@ -36,14 +39,16 @@ endif()
 if(UNIX)
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fPIC -O0 -g -Wall -Wextra -Wpedantic")
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -O3 -Wall -Wextra -Wpedantic")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob2 /Ot /Oy /W4")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068")
 endif()
 
 if(SKBUILD)
     set(LIBRARY_OUTPUT_PATH ${SKBUILD_PLATLIB_DIR}/coreforecast/lib)
 else()
-    set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/coreforecast/lib)
+    set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/python/coreforecast/lib)
 endif()
 
 include_directories(include)
@@ -52,10 +57,28 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/jmoralez/stl-cpp.git
     GIT_TAG 13d26c0d0653ddcdbf853de3f92f56faa831a330
 )
-FetchContent_MakeAvailable(stl-cpp)
-include_directories(${stl-cpp_SOURCE_DIR}/include)
-file(GLOB SOURCES src/*.cpp)
+FetchContent_GetProperties(stl-cpp)
+
+if(NOT stl-cpp_POPULATED)
+    FetchContent_Populate(stl-cpp)
+    include_directories(${stl-cpp_SOURCE_DIR}/include)
+endif()
+
+FetchContent_Declare(
+    skiplist
+    GIT_REPOSITORY https://github.com/paulross/skiplist.git
+    GIT_TAG aace571a8564067820ff7a5e34ba68d0a9782153
+)
+FetchContent_GetProperties(skiplist)
+
+if(NOT skiplist_POPULATED)
+    FetchContent_Populate(skiplist)
+    include_directories(${skiplist_SOURCE_DIR}/src/cpp)
+endif()
+
+file(GLOB SOURCES src/*.cpp ${skiplist_SOURCE_DIR}/src/cpp/SkipList.cpp)
 add_library(coreforecast SHARED ${SOURCES})
+
 if(MSVC)
     set_target_properties(coreforecast PROPERTIES OUTPUT_NAME "libcoreforecast")
 endif()

diff --git a/include/rolling.h b/include/rolling.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "SkipList.h"
+
 #include "grouped_array.h"
 #include "stats.h"
 
@@ -111,56 +113,20 @@ template <typename T>
 inline void RollingQuantileTransform(const T *data, int n, T *out,
                                      int window_size, int min_samples, T p) {
   int upper_limit = std::min(window_size, n);
-  T *buffer = new T[upper_limit];
-  int *positions = new int[upper_limit];
-  min_samples = std::min(min_samples, upper_limit);
-  for (int i = 0; i < min_samples - 1; ++i) {
-    buffer[i] = data[i];
-    positions[i] = i;
-    out[i] = std::numeric_limits<T>::quiet_NaN();
-  }
-  if (min_samples > 2) {
-    std::sort(buffer, buffer + min_samples - 2);
-  }
-  for (int i = min_samples - 1; i < upper_limit; ++i) {
-    int idx = std::lower_bound(buffer, buffer + i, data[i]) - buffer;
-    for (int j = 0; j < i - idx; ++j) {
-      buffer[i - j] = buffer[i - j - 1];
-      positions[i - j] = positions[i - j - 1];
+  OrderedStructs::SkipList::HeadNode<T> sl;
+  for (int i = 0; i < upper_limit; ++i) {
+    sl.insert(data[i]);
+    if (i + 1 < min_samples) {
+      out[i] = std::numeric_limits<T>::quiet_NaN();
+    } else {
+      out[i] = SortedQuantile(sl, p, i + 1);
     }
-    buffer[idx] = data[i];
-    positions[idx] = i;
-    out[i] = SortedQuantile(buffer, p, i + 1);
   }
   for (int i = window_size; i < n; ++i) {
-    int remove_idx =
-        std::min_element(positions, positions + window_size) - positions;
-    int idx;
-    if (data[i] <= buffer[remove_idx]) {
-      idx = std::lower_bound(buffer, buffer + remove_idx, data[i]) - buffer;
-      for (int j = 0; j < remove_idx - idx; ++j) {
-        buffer[remove_idx - j] = buffer[remove_idx - j - 1];
-        positions[remove_idx - j] = positions[remove_idx - j - 1];
-      }
-    } else {
-      idx = (std::lower_bound(buffer + remove_idx - 1, buffer + window_size,
-                              data[i]) -
-             buffer) -
-            1;
-      if (idx == window_size) {
-        --idx;
-      }
-      for (int j = 0; j < idx - remove_idx; ++j) {
-        buffer[remove_idx + j] = buffer[remove_idx + j + 1];
-        positions[remove_idx + j] = positions[remove_idx + j + 1];
-      }
-    }
-    buffer[idx] = data[i];
-    positions[idx] = i;
-    out[i] = SortedQuantile(buffer, p, window_size);
+    sl.remove(data[i - window_size]);
+    sl.insert(data[i]);
+    out[i] = SortedQuantile(sl, p, window_size);
   }
-  delete[] buffer;
-  delete[] positions;
 }
 
 template <typename Func, typename T, typename... Args>

diff --git a/include/stats.h b/include/stats.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "SkipList.h"
+
 #include <algorithm>
 #include <numeric>
 
@@ -10,19 +12,21 @@ template <typename T> inline T Quantile(T *data, T p, int n) {
   std::nth_element(data, data + i, data + n);
   T out = data[i];
   if (g > 0.0) {
-    std::nth_element(data, data + i + 1, data + n);
-    out += g * (data[i + 1] - out);
+    auto it = std::min_element(data + i + 1, data + n);
+    out += g * (*it - out);
   }
   return out;
 }
 
-template <typename T> inline T SortedQuantile(T *data, T p, int n) {
+template <typename T>
+inline T SortedQuantile(OrderedStructs::SkipList::HeadNode<T> &data, T p,
+                        int n) {
   T i_plus_g = p * (n - 1);
   int i = static_cast<int>(i_plus_g);
   T g = i_plus_g - i;
-  T out = data[i];
+  T out = data.at(i);
   if (g > 0.0) {
-    out += g * (data[i + 1] - out);
+    out += g * (data.at(i + 1) - out);
   }
   return out;
 }

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,9 +18,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
 ]
 description = "Fast implementations of common forecasting routines"
-authors = [
-    {name = "José Morales", email = "[email protected]"},
-]
+authors = [{ name = "José Morales", email = "[email protected]" }]
 readme = "README.md"
 keywords = ["forecasting", "time-series"]
 
@@ -39,15 +37,11 @@ logging.level = "INFO"
 sdist.exclude = ["tests", "*.yml"]
 sdist.reproducible = true
 wheel.install-dir = "coreforecast"
-wheel.packages = ["coreforecast"]
+wheel.packages = ["python/coreforecast"]
 wheel.py-api = "py3"
 
 [tool.cibuildwheel]
 archs = "all"
 build-verbosity = 3
-macos.before-build = [
-    "brew install libomp",
-    "./scripts/switch_xcode",
-]
-test-requires = "pandas pytest window-ops"
-test-command = "pytest {project}/tests -k correct"
+macos.before-build = ["brew install libomp", "./scripts/switch_xcode"]
+test-command = 'python -c "import coreforecast._lib"'
diff --git a/coreforecast/__init__.py → python/coreforecast/__init__.py b/coreforecast/__init__.py → python/coreforecast/__init__.py
diff --git a/coreforecast/_lib.py → python/coreforecast/_lib.py b/coreforecast/_lib.py → python/coreforecast/_lib.py
diff --git a/coreforecast/differences.py → python/coreforecast/differences.py b/coreforecast/differences.py → python/coreforecast/differences.py
diff --git a/python/coreforecast/expanding.py b/python/coreforecast/expanding.py
@@ -0,0 +1,91 @@
+__all__ = [
+    "expanding_mean",
+    "expanding_std",
+    "expanding_min",
+    "expanding_max",
+    "expanding_quantile",
+]
+
+import ctypes
+from typing import Callable
+
+import numpy as np
+
+from ._lib import _LIB
+from .utils import (
+    _data_as_void_ptr,
+    _ensure_float,
+    _float_arr_to_prefix,
+    _pyfloat_to_np_c,
+)
+
+
+def _expanding_stat(x: np.ndarray, stat: str) -> np.ndarray:
+    x = _ensure_float(x)
+    prefix = _float_arr_to_prefix(x)
+    out = np.empty_like(x)
+    _LIB[f"{prefix}_Expanding{stat}Transform"](
+        _data_as_void_ptr(x),
+        ctypes.c_int(x.size),
+        _data_as_void_ptr(out),
+    )
+    return out
+
+
+def _expanding_docstring(*args, **kwargs) -> Callable:
+    base_docstring = """Compute the {} of the input array.
+
+    Args:
+        x (np.ndarray): Input array.
+
+    Returns:
+        np.ndarray: Array with the expanding statistic
+    """
+
+    def docstring_decorator(function: Callable):
+        function.__doc__ = base_docstring.format(function.__name__)
+        return function
+
+    return docstring_decorator(*args, **kwargs)
+
+
+@_expanding_docstring
+def expanding_mean(x: np.ndarray) -> np.ndarray:
+    return _expanding_stat(x, "Mean")
+
+
+@_expanding_docstring
+def expanding_std(x: np.ndarray) -> np.ndarray:
+    return _expanding_stat(x, "Std")
+
+
+@_expanding_docstring
+def expanding_min(x: np.ndarray) -> np.ndarray:
+    return _expanding_stat(x, "Min")
+
+
+@_expanding_docstring
+def expanding_max(x: np.ndarray) -> np.ndarray:
+    return _expanding_stat(x, "Max")
+
+
+def expanding_quantile(x: np.ndarray, p: float) -> np.ndarray:
+    """Compute the expanding_quantile of the input array.
+
+    Args:
+        x (np.ndarray): Input array.
+        p (float): Quantile to compute.
+
+    Returns:
+        np.ndarray: Array with the expanding statistic
+    """
+    x = _ensure_float(x)
+    prefix = _float_arr_to_prefix(x)
+    out = np.empty_like(x)
+    _LIB[f"{prefix}_ExpandingQuantileTransform"](
+        _data_as_void_ptr(x),
+        ctypes.c_int(x.size),
+        _pyfloat_to_np_c(p, x.dtype),
+        _data_as_void_ptr(out),
+    )
+    return out
diff --git a/python/coreforecast/exponentially_weighted.py b/python/coreforecast/exponentially_weighted.py
@@ -0,0 +1,35 @@
+__all__ = ["exponentially_weighted_mean"]
+
+import ctypes
+
+import numpy as np
+
+from ._lib import _LIB
+from .utils import (
+    _ensure_float,
+    _float_arr_to_prefix,
+    _data_as_void_ptr,
+    _pyfloat_to_np_c,
+)
+
+
+def exponentially_weighted_mean(x: np.ndarray, alpha: float) -> np.ndarray:
+    """Compute the exponentially weighted mean of the input array.
+
+    Args:
+        x (np.ndarray): Input array.
+        alpha (float): Weight parameter.
+
+    Returns:
+        np.ndarray: Array with the exponentially weighted mean.
+    """
+    x = _ensure_float(x)
+    prefix = _float_arr_to_prefix(x)
+    out = np.empty_like(x)
+    _LIB[f"{prefix}_ExponentiallyWeightedMeanTransform"](
+        _data_as_void_ptr(x),
+        ctypes.c_int(x.size),
+        _pyfloat_to_np_c(alpha, x.dtype),
+        _data_as_void_ptr(out),
+    )
+    return out
diff --git a/coreforecast/grouped_array.py → python/coreforecast/grouped_array.py b/coreforecast/grouped_array.py → python/coreforecast/grouped_array.py
diff --git a/coreforecast/lag_transforms.py → python/coreforecast/lag_transforms.py b/coreforecast/lag_transforms.py → python/coreforecast/lag_transforms.py