From c95422d0f9c24abf9dbd2b50e8b5902389ea24a6 Mon Sep 17 00:00:00 2001
From: Mikkel Paltorp Schmitt <mpasc@elektro.dtu.dk>
Date: Wed, 18 Oct 2023 18:34:45 +0200
Subject: [PATCH] Adding smoothing spline example

---
 README.md                   |  3 ++
 examples/smoothingspline.py | 66 +++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 examples/smoothingspline.py

diff --git a/README.md b/README.md
index 91eddc7..2636c75 100644
--- a/README.md
+++ b/README.md
@@ -17,5 +17,8 @@ All implemented algorithms (multiplication, Cholesky factorization, forward/back
 
 A more in-depth descriptions of the algorithms can be found in [1] or [here](https://github.com/mipals/SmoothingSplines.jl/blob/master/mt_mikkel_paltorp.pdf).
 
+### Example application: Smoothing Splines
+An application of EGRSS matrices is smoothing splines as the so-called spline kernel matrix is an EGRSS matrix. An example implementation can be found in `examples/smoothingsplines.py`.
+
 ## References
 [1] M. S. Andersen and T. Chen, “Smoothing Splines and Rank Structured Matrices: Revisiting the Spline Kernel,” SIAM Journal on Matrix Analysis and Applications, 2020.
diff --git a/examples/smoothingspline.py b/examples/smoothingspline.py
new file mode 100644
index 0000000..4e1cb22
--- /dev/null
+++ b/examples/smoothingspline.py
@@ -0,0 +1,66 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from math import factorial
+from scipy import optimize
+from egrssmatrices.spline_kernel import spline_kernel
+from egrssmatrices.egrssmatrix import egrssmatrix
+
+# Evaluating polynomial basis
+def evaluate_polynomial_basis(t,p):
+    n = len(t)
+    H = np.ones((n,p),t.dtype)
+    for i in range(2,p+1):
+        H[:,i-1] = t**(i - 1)/factorial(i-1)
+    return H
+
+# Computing coeffieints and log-generalized-maximum-likelihood
+def compute_coefficients(K, H, y, alpha):
+    n, p = H.shape
+    chol = egrssmatrix(K.Ut,K.Vt,n*alpha)
+    chol.cholesky()
+    KinvH = np.zeros((n,p))
+    # For now solve only works with vectors, so we have to loop over the columns of H
+    for k in range(p):
+        KinvH[:,k] = chol.solve(H[:,k])
+    A = H.T@KinvH
+    v = chol.solve(y)
+    d = np.linalg.solve(A,H.T@v) # Dense computation, but A is p x p, so its not a problem
+    c = chol.solve(y - H@d)      # Efficient solve using the O(p^2n) algorithm.
+    log_gml = np.log(np.dot(y,c)) + chol.logdet()/(n - p) + np.linalg.slogdet(A)[1]/(n - p)
+    return c,d,log_gml
+
+# In the following we will fit the Forrester et al. (2008) Function
+f = lambda t: (6*t - 2)**2 * np.sin(12*t - 4)
+
+# Defining data
+n       = 100                            # Number of data points
+sigma   = 2.0                            # Noise standard deviation
+t = np.sort(np.random.rand(n))           # Generating random x-data in the interval
+y = f(t) + sigma*np.random.randn(len(t)) # Adding noise to the y-data
+
+# Defining the exact spline
+p = 2                               # Defining spline order
+Ut,Vt = spline_kernel(t,p)          # Computing the generators
+K = egrssmatrix(Ut,Vt)              # Defining the EGRSS matrix
+H = evaluate_polynomial_basis(t,p)  # Computing polynomial basis
+
+# Log generalized maximum likelihood
+def log_gml(v):
+    _,_,log_gml = compute_coefficients(K,H,y,10**(v))
+    return log_gml
+
+# Minimizing log-gml 
+minimizer = optimize.minimize_scalar(log_gml,method="brent")
+# Recomputing coefficients
+alpha = 10**(minimizer.x)
+c,d,_ = compute_coefficients(K,H,y,alpha)
+# Computing fit
+smoothed_fit = K@c + H@d
+
+plt.plot(t,f(t), label="True function")
+plt.plot(t,smoothed_fit, label="Spline fit")
+plt.scatter(t,y, label = "Data")
+plt.xlabel("t")
+plt.ylabel("y")
+plt.legend()
+plt.show()
\ No newline at end of file