From 91da16e8797641af9dbc298c6f415802cbcdf977 Mon Sep 17 00:00:00 2001 From: andre_ramos Date: Sat, 17 Aug 2024 12:50:52 -0400 Subject: [PATCH] add simulation feature --- Project.toml | 1 + README.md | 22 ++++-- docs/src/manual.md | 22 ++++-- src/StateSpaceLearning.jl | 67 ++++++++++++++++++- .../default_estimation_procedure.jl | 16 ++--- src/information_criteria.jl | 12 ++-- src/models/default_model.jl | 2 +- src/structs.jl | 4 +- src/utils.jl | 32 +++++++-- test/StateSpaceLearning.jl | 24 ++++++- .../default_estimation_procedure.jl | 44 ++++++------ test/information_criteria.jl | 8 +-- test/utils.jl | 14 ++-- 13 files changed, 197 insertions(+), 71 deletions(-) diff --git a/Project.toml b/Project.toml index 95d5ee6..c9a5218 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["andreramosfc "] version = "0.1.2" [deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/README.md b/README.md index 51276da..df03ce1 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu Create_X = output.Create_X # The function utilized to build the regression matrix. X = output.X # High Dimension Regression utilized in the estimation. coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation. -ϵ = output.ϵ # Residuals of the model. +ε = output.ε # Residuals of the model. fitted = output.fitted # Fit in Sample of the model. components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X"). residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model). @@ -52,7 +52,7 @@ Current features include: ## Quick Examples -### Fitting and forecasting +### Fitting, forecasting and simulating Quick example of fit and forecast for the air passengers time-series. ```julia @@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers) steps_ahead = 30 output = StateSpaceLearning.fit_model(log_air_passengers) -prediction_log = StateSpaceLearning.forecast(output, steps_ahead) +prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast prediction = exp.(prediction_log) plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) -plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue") +plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue") + +N_scenarios = 1000 +simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths + +plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) +for s in 1:N_scenarios-1 + plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red") +end +plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red") ``` ![quick_example_airp](./docs/assets/quick_example_airp.PNG) @@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features. -output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) +output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) Selected_exogenous = output.components["Exogenous_X"]["Selected"] @@ -138,12 +147,13 @@ using Plots airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) +airpassengers = Float64.(airp.passengers) log_air_passengers[60:72] .= NaN output = StateSpaceLearning.fit_model(log_air_passengers) fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72]) -real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72]) +real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72]) airpassengers[60:72] .= NaN plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) diff --git a/docs/src/manual.md b/docs/src/manual.md index 51276da..df03ce1 100644 --- a/docs/src/manual.md +++ b/docs/src/manual.md @@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu Create_X = output.Create_X # The function utilized to build the regression matrix. X = output.X # High Dimension Regression utilized in the estimation. coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation. -ϵ = output.ϵ # Residuals of the model. +ε = output.ε # Residuals of the model. fitted = output.fitted # Fit in Sample of the model. components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X"). residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model). @@ -52,7 +52,7 @@ Current features include: ## Quick Examples -### Fitting and forecasting +### Fitting, forecasting and simulating Quick example of fit and forecast for the air passengers time-series. ```julia @@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers) steps_ahead = 30 output = StateSpaceLearning.fit_model(log_air_passengers) -prediction_log = StateSpaceLearning.forecast(output, steps_ahead) +prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast prediction = exp.(prediction_log) plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) -plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue") +plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue") + +N_scenarios = 1000 +simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths + +plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) +for s in 1:N_scenarios-1 + plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red") +end +plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red") ``` ![quick_example_airp](./docs/assets/quick_example_airp.PNG) @@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features. -output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) +output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) Selected_exogenous = output.components["Exogenous_X"]["Selected"] @@ -138,12 +147,13 @@ using Plots airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) +airpassengers = Float64.(airp.passengers) log_air_passengers[60:72] .= NaN output = StateSpaceLearning.fit_model(log_air_passengers) fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72]) -real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72]) +real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72]) airpassengers[60:72] .= NaN plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl index bac448a..07d8b4b 100644 --- a/src/StateSpaceLearning.jl +++ b/src/StateSpaceLearning.jl @@ -1,6 +1,6 @@ module StateSpaceLearning -using LinearAlgebra, Statistics, GLMNet +using LinearAlgebra, Statistics, GLMNet, Distributions include("structs.jl") include("models/default_model.jl") @@ -106,4 +106,69 @@ function forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Matrix return complete_matrix[end-steps_ahead+1:end, :]*output.coefs end +""" +simulate(output::Output, steps_ahead::Int64; N_scenarios::Int64 = 1000, simulate_outliers::Bool = true, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl + + Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. + + # Arguments + - `output::Output`: Output object obtained from model fitting. + - `steps_ahead::Int64`: Number of steps ahead for simulation. + - `N_scenarios::Int64`: Number of scenarios to simulate (default: 1000). + - `simulate_outliers::Bool`: If true, simulate outliers (default: true). + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0)) + + # Returns + - `Matrix{Float64}`: Matrix containing simulated values. +""" +function simulate(output::Output, steps_ahead::Int64, N_scenarios::Int64; simulate_outliers::Bool = true, + innovation_functions::Dict = Dict("stochastic_level" => Dict("create_X" => create_ξ, "component" => "ξ", "args" => (length(output.ε) + steps_ahead + 1, 0)), + "stochastic_trend" => Dict("create_X" => create_ζ, "component" => "ζ", "args" => (length(output.ε) + steps_ahead + 1, 0, 1)), + "stochastic_seasonal" => Dict("create_X" => create_ω, "component" => "ω", "args" => (length(output.ε) + steps_ahead + 1, output.model_input["freq_seasonal"], 0, 1))), + Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl + + prediction = forecast(output, steps_ahead; Exogenous_Forecast = Exogenous_Forecast) + + T = length(output.ε) + simulation_X = zeros(steps_ahead, 0) + components_matrix = zeros(length(output.valid_indexes), 0) + N_components = 1 + + for innovation in keys(innovation_functions) + if output.model_input[innovation] + innov_dict = innovation_functions[innovation] + simulation_X = hcat(simulation_X, innov_dict["create_X"](innov_dict["args"]...)[end-steps_ahead:end-1, end-steps_ahead+1:end]) + comp = fill_innovation_coefs(T, innov_dict["component"], output) + components_matrix = hcat(components_matrix, comp[output.valid_indexes]) + N_components += 1 + end + end + + components_matrix = hcat(components_matrix, output.ε[output.valid_indexes]) + simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead)) + components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite + + ∑ = cov(components_matrix) + MV_dist = MvNormal(zeros(N_components), ∑) + o_noises = simulate_outliers && output.model_input["outlier"] ? rand(Normal(0, std(output.components["o"]["Coefs"])), steps_ahead, N_scenarios) : zeros(steps_ahead, N_scenarios) + + simulation = hcat([prediction for _ in 1:N_scenarios]...) + for s in 1:N_scenarios + sim_coefs = ones(size(simulation_X, 2)) .* NaN + + for i in 1:steps_ahead + rand_inovs = rand(MV_dist) + + for comp in eachindex(rand_inovs) + sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp] + end + end + + simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s]) + end + + return simulation + +end + end # module StateSpaceLearning diff --git a/src/estimation_procedure/default_estimation_procedure.jl b/src/estimation_procedure/default_estimation_procedure.jl index c8c52a8..29e8e17 100644 --- a/src/estimation_procedure/default_estimation_procedure.jl +++ b/src/estimation_procedure/default_estimation_procedure.jl @@ -76,16 +76,16 @@ function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, L method_vec = Vector{Float64}(undef, path_size) for i in 1:path_size fit = Lasso_X*model.betas[:, i] .+ model.a0[i] - ϵ = Lasso_y - fit + ε = Lasso_y - fit - method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria) + method_vec[i] = get_information(T, K[i], ε; information_criteria = information_criteria) end best_model_idx = argmin(method_vec) coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx] fit = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs - ϵ = Lasso_y - fit - return coefs, ϵ + ε = Lasso_y - fit + return coefs, ε end """ @@ -157,11 +157,11 @@ function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float end if hasintercept - coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average) + coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average) else - coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false) + coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false) end - return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ) + return rm_average ? (vcat(mean_y, coefs), ε) : (coefs, ε) end @@ -169,7 +169,7 @@ end fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, components_indexes::Dict{String, Vector{Int64}}, - ϵ::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + ε::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals. diff --git a/src/information_criteria.jl b/src/information_criteria.jl index d554f6e..8594a34 100644 --- a/src/information_criteria.jl +++ b/src/information_criteria.jl @@ -1,5 +1,5 @@ """ - get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; + get_information(T::Int64, K::Int64, ε::Vector{Float64}; information_criteria::String = "bic", p::Int64 = 0)::Float64 Calculates information criterion value based on the provided parameters and residuals. @@ -7,7 +7,7 @@ # Arguments - `T::Int64`: Number of observations. - `K::Int64`: Number of selected predictors. - - `ϵ::Vector{Float64}`: Vector of residuals. + - `ε::Vector{Float64}`: Vector of residuals. - `information_criteria::String`: Method for hyperparameter selection (default: "aic"). - `p::Int64`: Number of total predictors (default: 0). @@ -15,12 +15,12 @@ - `Float64`: Information criterion value. """ -function get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; information_criteria::String = "aic")::Float64 +function get_information(T::Int64, K::Int64, ε::Vector{Float64}; information_criteria::String = "aic")::Float64 if information_criteria == "bic" - return T*log(var(ϵ)) + K*log(T) + return T*log(var(ε)) + K*log(T) elseif information_criteria == "aic" - return 2*K + T*log(var(ϵ)) + return 2*K + T*log(var(ε)) elseif information_criteria == "aicc" - return 2*K + T*log(var(ϵ)) + ((2*K^2 +2*K)/(T - K - 1)) + return 2*K + T*log(var(ε)) + ((2*K^2 +2*K)/(T - K - 1)) end end \ No newline at end of file diff --git a/src/models/default_model.jl b/src/models/default_model.jl index f1f5939..09bd76e 100644 --- a/src/models/default_model.jl +++ b/src/models/default_model.jl @@ -259,7 +259,7 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic end """ - get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl + get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl Calculates variances for each innovation component and for the residuals. diff --git a/src/structs.jl b/src/structs.jl index 2faa391..fe0929c 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -8,7 +8,7 @@ - `Create_X::Function`: Function used to create the StateSpaceLearning Matrix. - `X::Matrix`: StateSpaceLearning Matrix data used in the model. - `coefs::Vector`: Coefficients obtained from the model. - - `ϵ::Vector`: Residuals of the model. + - `ε::Vector`: Residuals of the model. - `fitted::Vector`: Fitted values from the model. - `components::Dict`: Dictionary containing different components. - `residuals_variances::Dict`: Dictionary storing variances of residuals for different components. @@ -24,7 +24,7 @@ mutable struct Output Create_X::Function X::Matrix coefs::Vector - ϵ::Vector + ε::Vector fitted::Vector components::Dict residuals_variances::Dict diff --git a/src/utils.jl b/src/utils.jl index 00dc661..e5cc991 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -30,12 +30,12 @@ function build_components(X::Matrix{Tl}, coefs::Vector{Float64}, components_inde end """ -get_fit_and_residuals(estimation_ϵ::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64) -> Tuple{Vector{Float64}, Vector{Float64}} where Tl +get_fit_and_residuals(estimation_ε::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64) -> Tuple{Vector{Float64}, Vector{Float64}} where Tl Builds complete residuals and fit in sample. Residuals will contain nan values for non valid indexes. Fit in Sample will be a vector of fitted values computed from input data and coefficients (non valid indexes will also be calculated via interpolation). # Arguments - - `estimation_ϵ::Vector{Float64}`: Vector of estimation errors. + - `estimation_ε::Vector{Float64}`: Vector of estimation errors. - `coefs::Vector{Float64}`: Coefficients. - `X::Matrix{Tl}`: Input matrix. - `valid_indexes::Vector{Int64}`: Valid indexes. @@ -43,14 +43,14 @@ get_fit_and_residuals(estimation_ϵ::Vector{Float64}, coefs::Vector{Float64}, X: # Returns - Tuple containing: - - `ϵ::Vector{Float64}`: Vector containing NaN values filled with estimation errors at valid indexes. + - `ε::Vector{Float64}`: Vector containing NaN values filled with estimation errors at valid indexes. - `fitted::Vector{Float64}`: Vector of fitted values computed from input data and coefficients. """ -function get_fit_and_residuals(estimation_ϵ::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64)::Tuple{Vector{Float64}, Vector{Float64}} where Tl - ϵ = fill(NaN, T); ϵ[valid_indexes] = estimation_ϵ +function get_fit_and_residuals(estimation_ε::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64)::Tuple{Vector{Float64}, Vector{Float64}} where Tl + ε = fill(NaN, T); ε[valid_indexes] = estimation_ε fitted = X*coefs - return ϵ, fitted + return ε, fitted end """ @@ -120,4 +120,24 @@ has_intercept(X::Matrix{Tl})::Bool where Tl """ function has_intercept(X::Matrix{Tl})::Bool where Tl return any([all(X[:, i] .== 1) for i in 1:size(X, 2)]) +end + +""" +fill_innovation_coefs(component::String, output::Output)::Vector{Float64} + + Build the innovation coefficients for a given component with same length as the original time series and coefficients attributed to the first observation they are associated with. + + # Arguments + - `component::String`: Component name. + - `output::Output`: Output object obtained from model fitting. + + # Returns + - `Vector{Float64}`: Vector containing innovation coefficients for the given component. +""" +function fill_innovation_coefs(T::Int64, component::String, output::Output)::Vector{Float64} + inov_comp = zeros(T) + for (i, idx) in enumerate(output.components[component]["Indexes"]) + inov_comp[findfirst(i -> i != 0, output.X[:, idx])] = output.components[component]["Coefs"][i] + end + return inov_comp end \ No newline at end of file diff --git a/test/StateSpaceLearning.jl b/test/StateSpaceLearning.jl index 187b635..c369d92 100644 --- a/test/StateSpaceLearning.jl +++ b/test/StateSpaceLearning.jl @@ -3,14 +3,14 @@ y2 = rand(100); y2[10:20] .= NaN output1 = StateSpaceLearning.fit_model(y1) - @test length(output1.ϵ) == 100 + @test length(output1.ε) == 100 @test length(output1.fitted) == 100 @test size(output1.X, 1) == 100 @test size(output1.X, 2) == length(output1.coefs) output2 = StateSpaceLearning.fit_model(y2) @test output2.valid_indexes == setdiff(1:100, 10:20) - @test all(isnan.(output2.ϵ[10:20])) + @test all(isnan.(output2.ε[10:20])) @test !all(isnan.(output2.fitted[10:20])) output3 = StateSpaceLearning.fit_model(y1; model_input = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, "ζ_ω_threshold" => 1)) @@ -42,4 +42,24 @@ end forecast3 = trunc.(StateSpaceLearning.forecast(output3, 18); digits = 3) @assert forecast3 == [6.11, 6.082, 6.221, 6.19, 6.197, 6.328, 6.447, 6.44, 6.285, 6.163, 6.026, 6.142, 6.166, 6.138, 6.278, 6.246, 6.253, 6.384] +end + +@testset "Function: simulate" begin + y1 = rand(100) + y2 = rand(100); y2[10:20] .= NaN + + output1 = StateSpaceLearning.fit_model(y1) + @test size(StateSpaceLearning.simulate(output1, 10, 100)) == (10, 100) + + output2 = StateSpaceLearning.fit_model(y2; Exogenous_X = rand(100, 3)) + @test size(StateSpaceLearning.simulate(output2, 10, 100; Exogenous_Forecast = rand(10, 3))) == (10, 100) + + y3 = [4.718, 4.77, 4.882, 4.859, 4.795, 4.905, 4.997, 4.997, 4.912, 4.779, 4.644, 4.77, 4.744, 4.836, 4.948, 4.905, 4.828, 5.003, 5.135, 5.135, 5.062, 4.89, 4.736, 4.941, 4.976, 5.01, 5.181, 5.093, 5.147, 5.181, 5.293, 5.293, 5.214, 5.087, 4.983, 5.111, 5.141, 5.192, 5.262, 5.198, 5.209, 5.384, 5.438, 5.488, 5.342, 5.252, 5.147, 5.267, 5.278, 5.278, 5.463, 5.459, 5.433, 5.493, 5.575, 5.605, 5.468, 5.351, 5.192, 5.303, 5.318, 5.236, 5.459, 5.424, 5.455, 5.575, 5.71, 5.68, 5.556, 5.433, 5.313, 5.433, 5.488, 5.451, 5.587, 5.594, 5.598, 5.752, 5.897, 5.849, 5.743, 5.613, 5.468, 5.627, 5.648, 5.624, 5.758, 5.746, 5.762, 5.924, 6.023, 6.003, 5.872, 5.723, 5.602, 5.723, 5.752, 5.707, 5.874, 5.852, 5.872, 6.045, 6.142, 6.146, 6.001, 5.849, 5.72, 5.817, 5.828, 5.762, 5.891, 5.852, 5.894, 6.075, 6.196, 6.224, 6.001, 5.883, 5.736, 5.82, 5.886, 5.834, 6.006, 5.981, 6.04, 6.156, 6.306, 6.326, 6.137, 6.008, 5.891, 6.003, 6.033, 5.968, 6.037, 6.133, 6.156, 6.282, 6.432, 6.406, 6.23, 6.133, 5.966, 6.068] + output3 = StateSpaceLearning.fit_model(y3) + Random.seed!(123) + simulate1 = trunc.(StateSpaceLearning.simulate(output3, 18, 1000); digits = 3) + Random.seed!(123) + simulate2 = trunc.(StateSpaceLearning.simulate(output3, 18, 1000; simulate_outliers = false); digits = 3) + @assert simulate1 != simulate2 + end \ No newline at end of file diff --git a/test/estimation_procedure/default_estimation_procedure.jl b/test/estimation_procedure/default_estimation_procedure.jl index 62eb2d4..f0edfd9 100644 --- a/test/estimation_procedure/default_estimation_procedure.jl +++ b/test/estimation_procedure/default_estimation_procedure.jl @@ -8,23 +8,23 @@ penalty_factor = ones(3) intercept2 = false model1 = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept1, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) - coefs1, ϵ1 = StateSpaceLearning.get_path_information_criteria(model1, Estimation_X, estimation_y, "aic"; intercept = intercept1) + coefs1, ε1 = StateSpaceLearning.get_path_information_criteria(model1, Estimation_X, estimation_y, "aic"; intercept = intercept1) @test length(coefs1) == 4 @test coefs1[1] != 0 @test all(coefs1[2:end] .== 0) - @test length(ϵ1) == 30 + @test length(ε1) == 30 model2 = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept2, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) - coefs2, ϵ2 = StateSpaceLearning.get_path_information_criteria(model2, Estimation_X, estimation_y, "aic"; intercept = intercept2) + coefs2, ε2 = StateSpaceLearning.get_path_information_criteria(model2, Estimation_X, estimation_y, "aic"; intercept = intercept2) @test length(coefs2) == 3 @test all(coefs2 .== 0) - @test length(ϵ2) == 30 + @test length(ε2) == 30 end @testset "Function: fit_glmnet" begin - coefs, ϵ = StateSpaceLearning.fit_glmnet(Estimation_X, estimation_y, α; information_criteria="aic", penalty_factor=penalty_factor, intercept = true) + coefs, ε = StateSpaceLearning.fit_glmnet(Estimation_X, estimation_y, α; information_criteria="aic", penalty_factor=penalty_factor, intercept = true) @test length(coefs) == 4 - @test length(ϵ) == 30 + @test length(ε) == 30 end @testset "Function: fit_lasso" begin @@ -40,29 +40,29 @@ end Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural_w_level, Exogenous_X) estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10) - coefs1, ϵ1 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) + coefs1, ε1 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test length(coefs1) == 43 - @test length(ϵ1) == 10 + @test length(ε1) == 10 - coefs1, ϵ1 = StateSpaceLearning.fit_lasso(Estimation_X2, estimation_y, 0.1, "aic", true, components_indexes2, ones(size(Estimation_X2, 2)); rm_average = false) + coefs1, ε1 = StateSpaceLearning.fit_lasso(Estimation_X2, estimation_y, 0.1, "aic", true, components_indexes2, ones(size(Estimation_X2, 2)); rm_average = false) @test length(coefs1) == 42 - @test length(ϵ1) == 10 + @test length(ε1) == 10 - coefs2, ϵ2 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) + coefs2, ε2 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test coefs2[1] == mean(estimation_y) @test length(coefs2) == 43 - @test length(ϵ2) == 10 + @test length(ε2) == 10 - coefs3, ϵ3 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", false, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) + coefs3, ε3 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", false, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test coefs3[components_indexes["o"][4]] == 0 @test all(coefs3[components_indexes["Exogenous_X"]] .!= 0) @test length(coefs3) == 43 - @test length(ϵ3) == 10 + @test length(ε3) == 10 - coefs4, ϵ4 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, vcat(ones(1), ones(size(Estimation_X,2) - 2).*Inf); rm_average = true) + coefs4, ε4 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, vcat(ones(1), ones(size(Estimation_X,2) - 2).*Inf); rm_average = true) @test all(coefs4[3:end] .== 0) @test length(coefs4) == 43 - @test length(ϵ4) == 10 + @test length(ε4) == 10 end @testset "Function: default_estimation_procedure" begin @@ -80,19 +80,19 @@ end estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10).*5 estimation_input1 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true) - coefs1, ϵ1 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input1) + coefs1, ε1 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input1) @test length(coefs1) == 43 - @test length(ϵ1) == 10 + @test length(ε1) == 10 estimation_input1 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true) - coefs1, ϵ1 = StateSpaceLearning.default_estimation_procedure(Estimation_X2, estimation_y, components_indexes2, estimation_input1) + coefs1, ε1 = StateSpaceLearning.default_estimation_procedure(Estimation_X2, estimation_y, components_indexes2, estimation_input1) @test length(coefs1) == 42 - @test length(ϵ1) == 10 + @test length(ε1) == 10 estimation_input2 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => false) - coefs2, ϵ2 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input2) + coefs2, ε2 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input2) @test length(coefs2) == 43 - @test length(ϵ2) == 10 + @test length(ε2) == 10 @test all(coefs2[components_indexes["initial_states"][2:end] .- 1] .!= 0) end diff --git a/test/information_criteria.jl b/test/information_criteria.jl index 939ce00..bdd7f39 100644 --- a/test/information_criteria.jl +++ b/test/information_criteria.jl @@ -1,10 +1,10 @@ @testset "Function: get_information" begin - ϵ = [1.1, 2.2, 3.3, 4.4, 5.5] + ε = [1.1, 2.2, 3.3, 4.4, 5.5] T = 5 K = 3 - bic = StateSpaceLearning.get_information(T, K, ϵ; information_criteria = "bic") - aic = StateSpaceLearning.get_information(T, K, ϵ; information_criteria = "aic") - aicc = StateSpaceLearning.get_information(T, K, ϵ; information_criteria = "aicc") + bic = StateSpaceLearning.get_information(T, K, ε; information_criteria = "bic") + aic = StateSpaceLearning.get_information(T, K, ε; information_criteria = "aic") + aicc = StateSpaceLearning.get_information(T, K, ε; information_criteria = "aicc") @test round(bic, digits = 5) == 10.36287 @test round(aic, digits = 5) == 11.53456 @test round(aicc, digits = 5) == 35.53456 diff --git a/test/utils.jl b/test/utils.jl index f93046f..7a23532 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -38,16 +38,16 @@ end T = 30 valid_indexes1 = setdiff(collect(1:30), [11, 12]) - estimation_ϵ1 = rand(length(valid_indexes1)) - ϵ1, fitted1 = StateSpaceLearning.get_fit_and_residuals(estimation_ϵ1, coefs, X, valid_indexes1, T) - @test all(isnan.(ϵ1[11:12])) - @test !all(isnan.(ϵ1[valid_indexes1])) + estimation_ε1 = rand(length(valid_indexes1)) + ε1, fitted1 = StateSpaceLearning.get_fit_and_residuals(estimation_ε1, coefs, X, valid_indexes1, T) + @test all(isnan.(ε1[11:12])) + @test !all(isnan.(ε1[valid_indexes1])) @test !all(isnan.(fitted1)) valid_indexes2 = collect(1:30) - estimation_ϵ2 = rand(length(valid_indexes2)) - ϵ2, fitted2 = StateSpaceLearning.get_fit_and_residuals(estimation_ϵ2, coefs, X, valid_indexes2, T) - @test !all(isnan.(ϵ2[valid_indexes2])) + estimation_ε2 = rand(length(valid_indexes2)) + ε2, fitted2 = StateSpaceLearning.get_fit_and_residuals(estimation_ε2, coefs, X, valid_indexes2, T) + @test !all(isnan.(ε2[valid_indexes2])) @test !all(isnan.(fitted2)) end