Skip to content

Commit

Permalink
add simulation feature
Browse files Browse the repository at this point in the history
  • Loading branch information
andre_ramos committed Aug 17, 2024
1 parent 04b695a commit 91da16e
Show file tree
Hide file tree
Showing 13 changed files with 197 additions and 71 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ authors = ["andreramosfc <[email protected]>"]
version = "0.1.2"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
22 changes: 16 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu
Create_X = output.Create_X # The function utilized to build the regression matrix.
X = output.X # High Dimension Regression utilized in the estimation.
coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation.
ϵ = output.ϵ # Residuals of the model.
ε = output.ε # Residuals of the model.
fitted = output.fitted # Fit in Sample of the model.
components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
Expand Down Expand Up @@ -52,7 +52,7 @@ Current features include:

## Quick Examples

### Fitting and forecasting
### Fitting, forecasting and simulating
Quick example of fit and forecast for the air passengers time-series.

```julia
Expand All @@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
steps_ahead = 30

output = StateSpaceLearning.fit_model(log_air_passengers)
prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
prediction = exp.(prediction_log)

plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")

N_scenarios = 1000
simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths

plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
for s in 1:N_scenarios-1
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
end
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")

```
![quick_example_airp](./docs/assets/quick_example_airp.PNG)
Expand Down Expand Up @@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features

y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.

output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))

Selected_exogenous = output.components["Exogenous_X"]["Selected"]

Expand All @@ -138,12 +147,13 @@ using Plots
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
log_air_passengers = log.(airp.passengers)

airpassengers = Float64.(airp.passengers)
log_air_passengers[60:72] .= NaN

output = StateSpaceLearning.fit_model(log_air_passengers)

fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
airpassengers[60:72] .= NaN

plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
Expand Down
22 changes: 16 additions & 6 deletions docs/src/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu
Create_X = output.Create_X # The function utilized to build the regression matrix.
X = output.X # High Dimension Regression utilized in the estimation.
coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation.
ϵ = output.ϵ # Residuals of the model.
ε = output.ε # Residuals of the model.
fitted = output.fitted # Fit in Sample of the model.
components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
Expand Down Expand Up @@ -52,7 +52,7 @@ Current features include:

## Quick Examples

### Fitting and forecasting
### Fitting, forecasting and simulating
Quick example of fit and forecast for the air passengers time-series.

```julia
Expand All @@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
steps_ahead = 30

output = StateSpaceLearning.fit_model(log_air_passengers)
prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
prediction = exp.(prediction_log)

plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")

N_scenarios = 1000
simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths

plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
for s in 1:N_scenarios-1
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
end
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")

```
![quick_example_airp](./docs/assets/quick_example_airp.PNG)
Expand Down Expand Up @@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features

y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.

output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))

Selected_exogenous = output.components["Exogenous_X"]["Selected"]

Expand All @@ -138,12 +147,13 @@ using Plots
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
log_air_passengers = log.(airp.passengers)

airpassengers = Float64.(airp.passengers)
log_air_passengers[60:72] .= NaN

output = StateSpaceLearning.fit_model(log_air_passengers)

fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
airpassengers[60:72] .= NaN

plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
Expand Down
67 changes: 66 additions & 1 deletion src/StateSpaceLearning.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module StateSpaceLearning

using LinearAlgebra, Statistics, GLMNet
using LinearAlgebra, Statistics, GLMNet, Distributions

include("structs.jl")
include("models/default_model.jl")
Expand Down Expand Up @@ -106,4 +106,69 @@ function forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Matrix
return complete_matrix[end-steps_ahead+1:end, :]*output.coefs
end

"""
simulate(output::Output, steps_ahead::Int64; N_scenarios::Int64 = 1000, simulate_outliers::Bool = true, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl
Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data.
# Arguments
- `output::Output`: Output object obtained from model fitting.
- `steps_ahead::Int64`: Number of steps ahead for simulation.
- `N_scenarios::Int64`: Number of scenarios to simulate (default: 1000).
- `simulate_outliers::Bool`: If true, simulate outliers (default: true).
- `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0))
# Returns
- `Matrix{Float64}`: Matrix containing simulated values.
"""
function simulate(output::Output, steps_ahead::Int64, N_scenarios::Int64; simulate_outliers::Bool = true,
innovation_functions::Dict = Dict("stochastic_level" => Dict("create_X" => create_ξ, "component" => "ξ", "args" => (length(output.ε) + steps_ahead + 1, 0)),
"stochastic_trend" => Dict("create_X" => create_ζ, "component" => "ζ", "args" => (length(output.ε) + steps_ahead + 1, 0, 1)),
"stochastic_seasonal" => Dict("create_X" => create_ω, "component" => "ω", "args" => (length(output.ε) + steps_ahead + 1, output.model_input["freq_seasonal"], 0, 1))),
Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl

prediction = forecast(output, steps_ahead; Exogenous_Forecast = Exogenous_Forecast)

T = length(output.ε)
simulation_X = zeros(steps_ahead, 0)
components_matrix = zeros(length(output.valid_indexes), 0)
N_components = 1

for innovation in keys(innovation_functions)
if output.model_input[innovation]
innov_dict = innovation_functions[innovation]
simulation_X = hcat(simulation_X, innov_dict["create_X"](innov_dict["args"]...)[end-steps_ahead:end-1, end-steps_ahead+1:end])
comp = fill_innovation_coefs(T, innov_dict["component"], output)
components_matrix = hcat(components_matrix, comp[output.valid_indexes])
N_components += 1
end
end

components_matrix = hcat(components_matrix, output.ε[output.valid_indexes])
simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead))
components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite

= cov(components_matrix)
MV_dist = MvNormal(zeros(N_components), ∑)
o_noises = simulate_outliers && output.model_input["outlier"] ? rand(Normal(0, std(output.components["o"]["Coefs"])), steps_ahead, N_scenarios) : zeros(steps_ahead, N_scenarios)

simulation = hcat([prediction for _ in 1:N_scenarios]...)
for s in 1:N_scenarios
sim_coefs = ones(size(simulation_X, 2)) .* NaN

for i in 1:steps_ahead
rand_inovs = rand(MV_dist)

for comp in eachindex(rand_inovs)
sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp]
end
end

simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s])
end

return simulation

end

end # module StateSpaceLearning
16 changes: 8 additions & 8 deletions src/estimation_procedure/default_estimation_procedure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, L
method_vec = Vector{Float64}(undef, path_size)
for i in 1:path_size
fit = Lasso_X*model.betas[:, i] .+ model.a0[i]
ϵ = Lasso_y - fit
ε = Lasso_y - fit

method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria)
method_vec[i] = get_information(T, K[i], ε; information_criteria = information_criteria)
end

best_model_idx = argmin(method_vec)
coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx]
fit = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs
ϵ = Lasso_y - fit
return coefs, ϵ
ε = Lasso_y - fit
return coefs, ε
end

"""
Expand Down Expand Up @@ -157,19 +157,19 @@ function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float
end

if hasintercept
coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
else
coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
end
return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ)
return rm_average ? (vcat(mean_y, coefs), ε) : (coefs, ε)

end

"""
fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64,
information_criteria::String,
components_indexes::Dict{String, Vector{Int64}},
ϵ::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
ε::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals.
Expand Down
12 changes: 6 additions & 6 deletions src/information_criteria.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
"""
get_information(T::Int64, K::Int64, ϵ::Vector{Float64};
get_information(T::Int64, K::Int64, ε::Vector{Float64};
information_criteria::String = "bic", p::Int64 = 0)::Float64
Calculates information criterion value based on the provided parameters and residuals.
# Arguments
- `T::Int64`: Number of observations.
- `K::Int64`: Number of selected predictors.
- `ϵ::Vector{Float64}`: Vector of residuals.
- `ε::Vector{Float64}`: Vector of residuals.
- `information_criteria::String`: Method for hyperparameter selection (default: "aic").
- `p::Int64`: Number of total predictors (default: 0).
# Returns
- `Float64`: Information criterion value.
"""
function get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; information_criteria::String = "aic")::Float64
function get_information(T::Int64, K::Int64, ε::Vector{Float64}; information_criteria::String = "aic")::Float64
if information_criteria == "bic"
return T*log(var(ϵ)) + K*log(T)
return T*log(var(ε)) + K*log(T)
elseif information_criteria == "aic"
return 2*K + T*log(var(ϵ))
return 2*K + T*log(var(ε))
elseif information_criteria == "aicc"
return 2*K + T*log(var(ϵ)) + ((2*K^2 +2*K)/(T - K - 1))
return 2*K + T*log(var(ε)) + ((2*K^2 +2*K)/(T - K - 1))
end
end
2 changes: 1 addition & 1 deletion src/models/default_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic
end

"""
get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
Calculates variances for each innovation component and for the residuals.
Expand Down
4 changes: 2 additions & 2 deletions src/structs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
- `Create_X::Function`: Function used to create the StateSpaceLearning Matrix.
- `X::Matrix`: StateSpaceLearning Matrix data used in the model.
- `coefs::Vector`: Coefficients obtained from the model.
- `ϵ::Vector`: Residuals of the model.
- `ε::Vector`: Residuals of the model.
- `fitted::Vector`: Fitted values from the model.
- `components::Dict`: Dictionary containing different components.
- `residuals_variances::Dict`: Dictionary storing variances of residuals for different components.
Expand All @@ -24,7 +24,7 @@ mutable struct Output
Create_X::Function
X::Matrix
coefs::Vector
ϵ::Vector
ε::Vector
fitted::Vector
components::Dict
residuals_variances::Dict
Expand Down
Loading

0 comments on commit 91da16e

Please sign in to comment.