diff --git a/notebooks/_static/images/40_mpc_block_diagram_2.png b/notebooks/_static/images/40_mpc_block_diagram_2.png new file mode 100644 index 0000000..be161e1 --- /dev/null +++ b/notebooks/_static/images/40_mpc_block_diagram_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8fbd73d3372710b839d6a0d947925b20cda00c9b3c7005bc26966f80b9e1975 +size 71056 diff --git a/notebooks/_static/images/70_comparison_identification_methods.svg b/notebooks/_static/images/70_comparison_identification_methods.svg new file mode 100644 index 0000000..4e1831d --- /dev/null +++ b/notebooks/_static/images/70_comparison_identification_methods.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eafa85f14b67f2e4d9e6b784d0660ee2c60de36bfbecf33dfa2140ca2a95092 +size 746866 diff --git a/notebooks/_static/images/70_dmdc_overview.svg b/notebooks/_static/images/70_dmdc_overview.svg new file mode 100644 index 0000000..5888a4e --- /dev/null +++ b/notebooks/_static/images/70_dmdc_overview.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c42e3b607a8a4704b7d2935b85a8aca6a7fa4aaac3b062ce30c94241ec814e +size 626611 diff --git a/notebooks/_static/images/70_sindy_diagram.svg b/notebooks/_static/images/70_sindy_diagram.svg new file mode 100644 index 0000000..89d2ec3 --- /dev/null +++ b/notebooks/_static/images/70_sindy_diagram.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674602167c6ef74e5233296df861c2e2c22d88de125f2b177ee6a52a131d7329 +size 1028729 diff --git a/notebooks/_static/images/70_sindy_with_control_diagram.svg b/notebooks/_static/images/70_sindy_with_control_diagram.svg new file mode 100644 index 0000000..d9ab5bc --- /dev/null +++ b/notebooks/_static/images/70_sindy_with_control_diagram.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd42e2abbb74a08d5be94bbfe4148457ea124f818a1012e31427b3e857a19de +size 856017 diff --git a/notebooks/_static/images/70_comparison_model_driven_data_driven.svg b/notebooks/_static/images/80_comparison_model_driven_data_driven.svg similarity index 100% rename from notebooks/_static/images/70_comparison_model_driven_data_driven.svg rename to notebooks/_static/images/80_comparison_model_driven_data_driven.svg diff --git a/notebooks/_static/images/70_safe_control_block_diagram.svg b/notebooks/_static/images/80_safe_control_block_diagram.svg similarity index 100% rename from notebooks/_static/images/70_safe_control_block_diagram.svg rename to notebooks/_static/images/80_safe_control_block_diagram.svg diff --git a/notebooks/_static/images/70_safe_learning_approaches.svg b/notebooks/_static/images/80_safe_learning_approaches.svg similarity index 100% rename from notebooks/_static/images/70_safe_learning_approaches.svg rename to notebooks/_static/images/80_safe_learning_approaches.svg diff --git a/notebooks/_static/images/70_safety_filter.svg b/notebooks/_static/images/80_safety_filter.svg similarity index 100% rename from notebooks/_static/images/70_safety_filter.svg rename to notebooks/_static/images/80_safety_filter.svg diff --git a/notebooks/_static/images/70_safety_levels.svg b/notebooks/_static/images/80_safety_levels.svg similarity index 100% rename from notebooks/_static/images/70_safety_levels.svg rename to notebooks/_static/images/80_safety_levels.svg diff --git a/notebooks/bibliography.bib b/notebooks/bibliography.bib index 9c10e88..4bd594a 100644 --- a/notebooks/bibliography.bib +++ b/notebooks/bibliography.bib @@ -58,6 +58,26 @@ @article{brunke_safe_2022 langid = {english} } +@article{brunton_discovering_2016, + title = {Discovering Governing Equations from Data by Sparse Identification of Nonlinear Dynamical Systems}, + author = {Brunton, Steven L. and Proctor, Joshua L. and Kutz, J. Nathan}, + date = {2016-04-12}, + journaltitle = {Proceedings of the National Academy of Sciences}, + shortjournal = {PNAS}, + volume = {113}, + number = {15}, + eprint = {27035946}, + eprinttype = {pmid}, + pages = {3932--3937}, + publisher = {National Academy of Sciences}, + issn = {0027-8424, 1091-6490}, + doi = {10.1073/pnas.1517384113}, + url = {https://www.pnas.org/content/113/15/3932}, + urldate = {2021-03-28}, + abstract = {Extracting governing equations from data is a central challenge in many diverse areas of science and engineering. Data are abundant whereas models often remain elusive, as in climate science, neuroscience, ecology, finance, and epidemiology, to name only a few examples. In this work, we combine sparsity-promoting techniques and machine learning with nonlinear dynamical systems to discover governing equations from noisy measurement data. The only assumption about the structure of the model is that there are only a few important terms that govern the dynamics, so that the equations are sparse in the space of possible functions; this assumption holds for many physical systems in an appropriate basis. In particular, we use sparse regression to determine the fewest terms in the dynamic governing equations required to accurately represent the data. This results in parsimonious models that balance accuracy with model complexity to avoid overfitting. We demonstrate the algorithm on a wide range of problems, from simple canonical systems, including linear and nonlinear oscillators and the chaotic Lorenz system, to the fluid vortex shedding behind an obstacle. The fluid example illustrates the ability of this method to discover the underlying dynamics of a system that took experts in the community nearly 30 years to resolve. We also show that this method generalizes to parameterized systems and systems that are time-varying or have external forcing.}, + langid = {english} +} + @article{brunton_modern_2022, title = {Modern {{Koopman Theory}} for {{Dynamical Systems}}}, author = {Brunton, Steven L. and Budišić, Marko and Kaiser, Eurika and Kutz, J. Nathan}, @@ -75,6 +95,17 @@ @article{brunton_modern_2022 abstract = {We establish the convergence of a class of numerical algorithms, known as dynamic mode decomposition (DMD), for computation of the eigenvalues and eigenfunctions of the infinite-dimensional Koopman operator. The algorithms act on data coming from observables on a state space, arranged in Hankel-type matrices. The proofs utilize the assumption that the underlying dynamical system is ergodic. This includes the classical measure-preserving systems, as well as systems whose attractors support a physical measure. Our approach relies on the observation that vector projections in DMD can be used to approximate the function projections by the virtue of Birkhoff's ergodic theorem. Using this fact, we show that applying DMD to Hankel data matrices in the limit of infinite-time observations yields the true Koopman eigenfunctions and eigenvalues. We also show that the singular value decomposition, which is the central part of most DMD algorithms, converges to the proper orthogonal decomposition of observables. We use this result to obtain a representation of the dynamics of systems with continuous spectrum based on the lifting of the coordinates to the space of observables. The numerical application of these methods is demonstrated using well-known dynamical systems and examples from computational fluid dynamics.} } +@online{brunton_sparse_2016, + title = {Sparse {{Identification}} of {{Nonlinear Dynamics}} with {{Control}} ({{SINDYc}})}, + author = {Brunton, Steven L. and Proctor, Joshua L. and Kutz, J. Nathan}, + date = {2016-05-21}, + url = {https://arxiv.org/abs/1605.06682v1}, + urldate = {2024-05-14}, + abstract = {Identifying governing equations from data is a critical step in the modeling and control of complex dynamical systems. Here, we investigate the data-driven identification of nonlinear dynamical systems with inputs and forcing using regression methods, including sparse regression. Specifically, we generalize the sparse identification of nonlinear dynamics (SINDY) algorithm to include external inputs and feedback control. This method is demonstrated on examples including the Lotka-Volterra predator--prey model and the Lorenz system with forcing and control. We also connect the present algorithm with the dynamic mode decomposition (DMD) and Koopman operator theory to provide a broader context.}, + langid = {english}, + organization = {arXiv.org} +} + @article{cerf_combining_2023, title = {Combining Neural Networks and Control: Potentialities, Patterns and Perspectives}, shorttitle = {Combining Neural Networks and Control}, @@ -119,6 +150,18 @@ @article{dogra_optimizing_2020 langid = {english} } +@online{fasel_sindy_2021, + title = {{{SINDy}} with {{Control}}: {{A Tutorial}}}, + shorttitle = {{{SINDy}} with {{Control}}}, + author = {Fasel, Urban and Kaiser, Eurika and Kutz, J. Nathan and Brunton, Bingni W. and Brunton, Steven L.}, + date = {2021-08-30}, + url = {https://arxiv.org/abs/2108.13404v1}, + urldate = {2024-05-14}, + abstract = {Many dynamical systems of interest are nonlinear, with examples in turbulence, epidemiology, neuroscience, and finance, making them difficult to control using linear approaches. Model predictive control (MPC) is a powerful model-based optimization technique that enables the control of such nonlinear systems with constraints. However, modern systems often lack computationally tractable models, motivating the use of system identification techniques to learn accurate and efficient models for real-time control. In this tutorial article, we review emerging data-driven methods for model discovery and how they are used for nonlinear MPC. In particular, we focus on the sparse identification of nonlinear dynamics (SINDy) algorithm and show how it may be used with MPC on an infectious disease control example. We compare the performance against MPC based on a linear dynamic mode decomposition (DMD) model. Code is provided to run the tutorial examples and may be modified to extend this data-driven control framework to arbitrary nonlinear systems.}, + langid = {english}, + organization = {arXiv.org} +} + @book{goodwin_control_2000, title = {Control {{System Desig}}}, author = {Goodwin, Graham C.}, diff --git a/notebooks/nb_10_introduction.ipynb b/notebooks/nb_10_introduction.ipynb index 0113db2..07e94e6 100644 --- a/notebooks/nb_10_introduction.ipynb +++ b/notebooks/nb_10_introduction.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": { "editable": true, "init_cell": true, @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": { "init_cell": true, "scene__Default Scene": true, @@ -201,7 +201,7 @@ "" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -575,12 +575,16 @@ "- In **control theory**:\n", "\n", " - we explicitly model the system using knowledge about the equations governing its behaviour, by estimating the parameters of such equations or by fitting a model on measurements from the system.\n", - " - we synthesize a controller by **minimizing** a cost function, in the case of optimal control. \n", + " - we generally deal with dynamical systems governed by differential equations.\n", + " - we synthesize a controller by **minimizing** a cost function, in the case of optimal control.\n", + " - we may have to reconstruct the state from measurements using an observer.\n", " \n", "- Whereas in **reinforcement learning**:\n", "\n", " - we do have to model the system and instead can directly learn the agent that maximizes the expected reward while interacting with the environment.\n", - " - we train an agent by **maximing** a reward function." + " - We generally deal with systems modelled by Markov Decision Processes (MDPs).\n", + " - we train an agent by **maximing** a reward function.\n", + " - we directly use the measurements from the environment." ] }, { @@ -919,7 +923,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "```{exercise} RC-Circuit Exercise\n", + "::::{exercise} RC-Circuit Exercise\n", ":label: rc-circuit-exercise\n", ":width: 80%\n", "\n", @@ -936,15 +940,26 @@ "\n", "$$R \\frac{d y(t)}{dt} + \\frac{1}{C} y(t) = u(t)$$\n", "\n", - "with $q(0) = 0$ i.e. the capacitor is uncharged at $t=0$\n", + "with $y(0) = 0$ i.e. the capacitor is uncharged at $t=0$\n", "\n", "The current in the circuit is given by: $i(t) = \\frac{d y(t)}{dt}$\n", "\n", - "### Questions:\n", + "**Questions**:\n", "\n", "- Suppose our only goal is to charge the capacitor as quickly as possible without worrying about anything else. What would be your choice for a cost function?\n", "- Suppose that we additionally want to limit the current running throught the circuit. What would then be your choice for a cost function?\n", - "```" + "::::" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ":::{solution} rc-circuit-exercise\n", + ":class: dropdown\n", + "- $J = - y(t)$\n", + "- $J = - y(t) + \\lambda i(t), \\quad \\lambda > 0$\n", + ":::" ] } ], diff --git a/notebooks/nb_20_dynamic_programming.ipynb b/notebooks/nb_20_dynamic_programming.ipynb index a1dec01..ede502b 100644 --- a/notebooks/nb_20_dynamic_programming.ipynb +++ b/notebooks/nb_20_dynamic_programming.ipynb @@ -185,7 +185,8 @@ "- Scheduling algorithms\n", "- Graph algorithms (e.g., shortest path algorithms)\n", "- Graphical models in ML (e.g., Viterbi algorithm)\n", - "- Bioinformatics (e.g., Sequence alignment, Protein folding) " + "- Bioinformatics (e.g., Sequence alignment, Protein folding)\n", + "- Finance (e.g. Derivatives)" ] }, { @@ -194,7 +195,7 @@ "source": [ "## Discrete Time\n", "\n", - "We define the **optimla cost-to-go function** (also known as **value function**) for any feasible $\\mathbf{x} \\in \\mathbf{X}$ as[^*]:\n", + "We define the **optimal cost-to-go function** (also known as **value function**) for any feasible $\\mathbf{x} \\in \\mathbf{X}$ as[^*]:\n", "\n", "$$\n", "V(\\mathbf{x}) := \\min_{u \\in \\mathbf{U}} J(\\mathbf{x}, \\mathbf{u})\n", @@ -236,7 +237,7 @@ "If we now replace $\\mathbf{x}_1 = f(\\mathbf{x}_0, \\mathbf{u}_0)$ and then get rid of time indices we finally get:\n", "\n", "$$\n", - "V(\\mathbf{x}_0) = \\min_{\\mathbf{u} \\in \\mathbf{U}} \\left[ c(\\mathbf{x}, \\mathbf{u}) + V(f(\\mathbf{x}, \\mathbf{u})) \\right]\n", + "V(\\mathbf{x}) = \\min_{\\mathbf{u} \\in \\mathbf{U}} \\left[ c(\\mathbf{x}, \\mathbf{u}) + V(f(\\mathbf{x}, \\mathbf{u})) \\right]\n", "$$\n", "\n", "This equation is called the *Bellman equation*." @@ -359,43 +360,70 @@ "\n", "Each node in this new graph represents a state. We will start from the tail (the last states) and compute recursively the cost for each state transition.\n", "\n", - "Let $l(n_1, n_2)$ the cost of going from node $n_1$ to $n_2$ and $V(n)$ be the cost-to-go from node $n$.\n", + "Let $c(n_1, n_2)$ the cost of moving from node $n_1$ to node $n_2$ and $V(n)$ be the optimal cost-to-go from node $n$. We have $$V({\\text{G}}) = 0$$.\n", + "\n", + "We start with nodes **F** and **E**:\n", "\n", "$$\n", "\\begin{array}{lll}\n", - "V(\\text{ABDF}) &= g(\\text{ABDF}, \\text{ABDFG}) &= 1\\\\\n", - "V(\\text{ABE}) &= g(\\text{ABE}, \\text{ABEG}) &= 4\\\\\n", - "V(\\text{ACF}) &= g(\\text{ACF}, \\text{ACFG}) &= 1\\\\\n", - "V(\\text{ADF}) &= g(\\text{ADF}, \\text{ADFG}) &= 1\\\\\n", + "V(\\text{F}) &= c(\\text{F}, \\text{G}) + V({\\text{G}}) &= 1 + 0 &= 1\\\\\n", + "V(\\text{E}) &= c(\\text{E}, \\text{G}) + V({\\text{G}}) &= 1 + 0 &= 1\\\\\n", "\\end{array}\n", "$$\n", "\n", + "We then move to nodes **D** and **C**:\n", + "\n", "$$\n", "\\begin{array}{lll}\n", - "V(\\text{ABD}) &= \\min \\left[ g(\\text{ABD}, \\text{ABDG}), g(\\text{ABD}, \\text{ABDF}) + V(\\text{ABDF}) \\right]\n", - "&= \\min \\left[ 8, 5 + 1 \\right] &= 6\n", - "\\\\\n", - "V(\\text{AB}) &= \\min \\left[ g(\\text{AB}, \\text{ABD}) + V(\\text{ABD}), g(\\text{AB}, \\text{ABE}) + V(\\text{ABE}) \\right]\n", - "&= \\min \\left[ 9 + 6, 1 + 4 \\right] &= 5\n", - "\\\\\n", - "V(\\text{AC}) &= g(\\text{AC}, \\text{ACF}) + V(\\text{ACF}) &= 2 + 1 &= 3\n", - "\\\\\n", - "V(\\text{AD}) &= \\min \\left[ g(\\text{AD}, \\text{ADF}) + V(\\text{ADF}), g(\\text{AD}, \\text{ADG})) \\right]\n", - "&= \\min \\left[ 5 + 1, 8 \\right] &= 6\n", + "V(\\text{D}) &= \\min \\left[ c(\\text{D}, \\text{G}) + V({\\text{G}}), c(\\text{D}, \\text{F}) + V(\\text{F}) \\right]\n", + "&= \\min \\left[ 8 + 0, 5 + 1 \\right] &= 6\n", "\\\\\n", + "V(\\text{C}) &= c(\\text{C}, \\text{F}) + V({\\text{F}}) &= 2 + 1 &= 1\n", + "\\end{array}\n", + "$$\n", + "\n", + "After that we move to node **B**:\n", + "\n", + "$$\n", + "\\begin{array}{lll}\n", + "V(\\text{B}) &= \\min \\left[ c(\\text{B}, \\text{D}) + V({\\text{D}}), c(\\text{B}, \\text{E}) + V(\\text{E}) \\right]\n", + "&= \\min \\left[ 9 + 6, 1 + 1 \\right] &= 2\n", "\\end{array}\n", "$$\n", "\n", + "We finally go to node **A**:\n", + "\n", "$$\n", "\\begin{array}{lll}\n", "V(\\text{A}) &= \\min \\left[\n", - "g(\\text{A}, \\text{AB}) + V(\\text{AB}), g(\\text{A}, \\text{AC}) + V(\\text{AC}), g(\\text{A}, \\text{AD}) + V(\\text{AD})\n", + "c(\\text{A}, \\text{B}) + V(\\text{B}), c(\\text{A}, \\text{C}) + V(\\text{C}), c(\\text{A}, \\text{D}) + V(\\text{D})\n", "\\right]\n", - "&= \\min \\left[ 1 + 5, 5 + 3, 3 + 6 \\right] &= 6\n", + "&= \\min \\left[ 4 + 2, 5 + 3, 3 + 6 \\right] &= 6\n", "\\\\\n", "\\end{array}\n", "$$\n", "\n", + "Now that we have computed the optimal cost-to-go, we can proceed in a forward manner to determine the best path:\n", + "\n", + "$$\n", + "\\pi^* = \\underset{n}{\\argmin} [c(n_1, n_2) + V(n_2)]\n", + "$$\n", + "\n", + "For the first action (step) we have:\n", + "\n", + "$$\n", + "\\pi^*_0 &= \\underset{n_2 \\in \\{ B, C, D \\}}{\\argmin} \\left[ c(A, n_2) + V(n_2) \\right] \\\\ \n", + "&= \\underset{n_2}{\\argmin} \\left[ c(A, n_2 = B) + V(n_2 = B), c(A, n_2 = C) + V(n_2 = C), c(A, n_2 = D) + V(n_2 = D) \\right] \\\\\n", + "&= \\underset{n_2}{\\argmin} \\left[ 4 + 2, 5 + 3, 3 + 6 \\right] \\\\\n", + "&= B\n", + "$$\n", + "\n", + "Proceeding the same way we get:\n", + "\n", + "$$\n", + "\\pi^* &= \\{\\pi^*_0, \\pi^*_1, \\pi^*_2\\} &= \\{\\text{B, E, G} \\}\n", + "$$\n", + "\n", "The shortest-path is ABEG." ] }, @@ -414,24 +442,44 @@ "source": [ "### Value Iteration\n", "\n", - "Another way to compute the optimal cost-to-go for all states that is also applicable in stochastic problems\n", - "is the **Value Iteration** algorithm:\n", + "Another way to compute the optimal cost-to-go for all states that is also applicable is the **Value Iteration** algorithm:\n", "\n", "$$\n", "\\begin{array}{l}\n", - " \\textbf{Input}:\\ \\text{MDP}\\ M = \\langle S, s_0, A, P_a(s' \\mid s), r(s,a,s')\\rangle\\\\\n", + " \\textbf{Input}:\\ \\text{MDP}\\ M = \\langle S, s_0, U, c(s, u)\\rangle\\\\\n", " \\textbf{Output}:\\ \\text{Value function}\\ V\\\\[2mm]\n", " \\text{Set}\\ V\\ \\text{to arbitrary value function; e.g., }\\ V(s) = 0\\ \\text{for all}\\ s\\\\[2mm]\n", " \\text{repeat}\\ \\\\\n", " \\quad\\quad \\Delta \\leftarrow 0 \\\\\n", " \\quad\\quad \\text{foreach}\\ s \\in S \\\\\n", - " \\quad\\quad\\quad\\quad \\underbrace{V'(s) \\leftarrow \\max_{a \\in A(s)} \\sum_{s' \\in S} P_a(s' \\mid s)\\ [r(s,a,s') + \n", + " \\quad\\quad\\quad\\quad \\underbrace{V'(s) \\leftarrow \\min_{u \\in U(s)} \\left[c(s, u) + \\ V(s') \\right]}_{\\text{Bellman equation}} \\\\\n", + " \\quad\\quad\\quad\\quad \\Delta \\leftarrow \\min(\\Delta, |V'(s) - V(s)|) \\\\\n", + " \\quad\\quad V \\leftarrow V' \\\\\n", + " \\text{until}\\ \\Delta \\leq 0.0 \n", + "\\end{array}\n", + "$$\n", + "\n", + ":::{note} Stochastic Value Iteration\n", + ":class:dropdown\n", + "\n", + "It can also be used in stochastic systems:\n", + "\n", + "$$\n", + "\\begin{array}{l}\n", + " \\textbf{Input}:\\ \\text{MDP}\\ M = \\langle S, s_0, U, P_u(s' \\mid s), c(s, u, s')\\rangle\\\\\n", + " \\textbf{Output}:\\ \\text{Value function}\\ V\\\\[2mm]\n", + " \\text{Set}\\ V\\ \\text{to arbitrary value function; e.g., }\\ V(s) = 0\\ \\text{for all}\\ s\\\\[2mm]\n", + " \\text{repeat}\\ \\\\\n", + " \\quad\\quad \\Delta \\leftarrow 0 \\\\\n", + " \\quad\\quad \\text{foreach}\\ s \\in S \\\\\n", + " \\quad\\quad\\quad\\quad \\underbrace{V'(s) \\leftarrow \\min_{u \\in U(s)} \\sum_{s' \\in S} P_a(s' \\mid s)\\ [r(s,a,s') + \n", " \\gamma\\ V(s') ]}_{\\text{Bellman equation}} \\\\\n", - " \\quad\\quad\\quad\\quad \\Delta \\leftarrow \\max(\\Delta, |V'(s) - V(s)|) \\\\\n", + " \\quad\\quad\\quad\\quad \\Delta \\leftarrow \\min(\\Delta, |V'(s) - V(s)|) \\\\\n", " \\quad\\quad V \\leftarrow V' \\\\\n", " \\text{until}\\ \\Delta \\leq \\theta \n", "\\end{array}\n", - "$$" + "$$\n", + ":::" ] }, { @@ -540,14 +588,6 @@ ":::" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "````{exercise-end}\n", - "````" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -579,6 +619,7 @@ " value_iteration,\n", " compute_best_path_and_actions_from_values,\n", ")\n", + "import networkx as nx\n", "```\n", "\n", "To determine the number of paths from start node to target node we use:\n", @@ -628,7 +669,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Continous Time\n", + "## Continuous Time\n", "\n", "Let's consider a continuous-time optimal control problem with finite horizon over the time period $[t_0 ,t_f]$.\n", "\n", @@ -652,7 +693,7 @@ "\\displaystyle V(\\mathbf{x}(t), t_0, t_f) = \\underset{\\mathbf{u(t)}}{min} \\left[ J(\\mathbf{x}(t), \\mathbf{u}(t), t_0, t_f) \\right]\n", "$$\n", "\n", - "It can be show that for every $s, \\tau \\in [t_0, t_1]$, $s \\leq \\tau$ , and $\\mathbf{x} \\in \\mathbf{X}$, we have:\n", + "It can be show that for every $s, \\tau \\in [t_0, t_f]$, $s \\leq \\tau$ , and $\\mathbf{x} \\in \\mathbf{X}$, we have:\n", "\n", "$$\n", "V(s, \\mathbf{x}) = \\underset{}{\\min} \\left[ \\int\\limits_{s}^{\\tau} c(\\mathbf{x}(t), \\mathbf{u}(t)) d\\tau + V(\\tau, \\mathbf{x}(\\tau)) \\right]\n", @@ -709,35 +750,6 @@ "\n", ":::" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Numerical Methods for Continuous-time Optimal Control\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Direct Single Shooting" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Direct Multiple Shooting" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Collocation" - ] } ], "metadata": { diff --git a/notebooks/nb_50_MPC.ipynb b/notebooks/nb_50_MPC.ipynb index a635bf5..f7b33ba 100644 --- a/notebooks/nb_50_MPC.ipynb +++ b/notebooks/nb_50_MPC.ipynb @@ -226,7 +226,7 @@ } }, "source": [ - ":::{figure} _static/images/40_mpc_block_diagram.svg\n", + ":::{figure} _static/images/40_mpc_block_diagram_2.png\n", ":width: 60%\n", "MPC Block Diagram.\n", ":::" @@ -505,7 +505,7 @@ "outputs": [], "source": [ "%%capture\n", - "max_steps = 100\n", + "max_steps = 200\n", "controller = MPCController(cart_mpc_controller)\n", "results = simulate_environment(cart_env, max_steps=max_steps, controller=controller)" ] @@ -941,7 +941,9 @@ "metadata": {}, "outputs": [], "source": [ - "inverted_pendulum_env = create_inverted_pendulum_environment(max_steps=200)\n", + "inverted_pendulum_env = create_inverted_pendulum_environment(\n", + " max_steps=200, theta_initial=180, theta_threshold=np.inf\n", + ")\n", "\n", "inverted_pendulum_nonlin_model = build_inverted_pendulum_nonlinear_model(\n", " inverted_pendulum_env, with_uncertainty=True\n", @@ -965,6 +967,7 @@ "outputs": [], "source": [ "setpoint = np.zeros((4, 1))\n", + "setpoint[0] = 1.0\n", "distance_cost = casadi.bilin(\n", " np.diag([1, 0, 100, 0]), inverted_pendulum_nonlin_model.x.cat - setpoint\n", ")\n", @@ -1048,7 +1051,7 @@ "outputs": [], "source": [ "%%capture\n", - "max_steps = 100\n", + "max_steps = 200\n", "controller = MPCController(inverted_pendulum_mpc_controller)\n", "results = simulate_environment(\n", " inverted_pendulum_env, max_steps=max_steps, controller=controller\n", diff --git a/notebooks/nb_70_machine_learning_control.ipynb b/notebooks/nb_70_machine_learning_control.ipynb index 46c26da..cd61dea 100644 --- a/notebooks/nb_70_machine_learning_control.ipynb +++ b/notebooks/nb_70_machine_learning_control.ipynb @@ -261,14 +261,21 @@ ], "source": [ "%autoreload\n", - "import warnings\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", - "\n", "import pykoopman as pk\n", - "\n", + "import pysindy as ps\n", + "from scipy.signal import periodogram\n", + "from scipy.fft import rfft, rfftfreq\n", + "\n", + "from training_ml_control.control import (\n", + " ConstantController,\n", + " SineController,\n", + " SumOfSineController,\n", + " RandomController,\n", + ")\n", "from training_ml_control.environments import (\n", " create_cart_environment,\n", " create_inverted_pendulum_environment,\n", @@ -276,7 +283,6 @@ ")\n", "from training_ml_control.nb_utils import show_video, display_array\n", "\n", - "warnings.simplefilter(\"ignore\", UserWarning)\n", "sns.set_theme()\n", "plt.rcParams[\"figure.figsize\"] = [12, 8]" ] @@ -351,9 +357,13 @@ "source": [ "## System Identification\n", "\n", - "- Control relies on accurate system models, so one approach is learning to adjust the model either during operation or between different operational instances.\n", - "- Traditionally models are derived offline before control using first principles and identification.\n", - "- Learning-based system identification constructs and updates models and uncertainties from data." + "System Identification is the process of constructing a model of a (dynamical) system from observations (measurements) of its inputs and outputs. System identification also includes the optimal design of experiments for efficiently generating informative data for fitting such models as well as model reduction.\n", + "\n", + "In control engineering, the objective is to obtain a good performance of the closed-loop system, which is the one comprising the physical system, the feedback loop and the controller. This performance is typically achieved by designing the control law relying on a model of the system, which needs to be identified starting from experimental data. If the model identification procedure is aimed at control purposes, what really matters is not to obtain the best possible model that fits the data, as in the classical system identification approach, but to obtain a model satisfying enough for the closed-loop performance.\n", + "\n", + "In control engineering, we typically rely on the mathematical modelling of the system from first principles and then optionally use parameter identification methods to determine the values of the mathematical model's parameters. \n", + "\n", + "In our case, we will not rely on that and instead prefer fitting a model based only on measured data." ] }, { @@ -378,7 +388,11 @@ "\n", "- We could evaluate the model's prediction on a held-out validation set.\n", "- Additionally we could evaluate the model's long-term predictions in an open-loop manner.\n", - "- On top of that, we could evaluate the model's step and impulse responses from different initial states. This is especially useful for linear systems.\n", + "- On top of that, we could evaluate the model's step[^1] and impulse[^2] responses from different initial states. This is especially useful for linear systems.\n", + "- It is also valuable to look at what the model's residuals and make sure there is no correlation with other available information such as the input.\n", + "\n", + "[^1]: The step response of a dynamic system is its output when presented with an input signal, called step, that changes from 0 to a constant values in a very short amount of time.\n", + "[^2]: The impulse response of a dynamic system is its output when presented with a brief input signal, called an impulse. Any LTI system can be uniquely characterized by its impulse response\n", ":::" ] }, @@ -389,9 +403,18 @@ "source": [ "## Data Collection\n", "\n", - "Before learning a model of the system, we need to collect either during the system's operation i.e. online data collection, or inbetween episodes of the system's operation i.e. offline data collection. The collected has to be comprehensive and be representative of the all behaviours of the system that we desire to capture.\n", + "Before learning a model of the system, we need to collect either during the system's operation i.e. online data collection, or inbetween episodes of the system's operation i.e. offline data collection. The collected data has to be informative, i.e. comprehensive and be representative of the all behaviours of the system that we desire to capture.\n", + "\n", + "Data informativity is a condition on the data under which it is possible to distinguish between different models in a (parametric) model class.\n", "\n", - "To do that properly for a controlled system, we need either a human expert or a well-tuned program under supervision for data collection. However, in many safety-critical tasks such as space exploration, there is no expert collecting data. Naturally, here comes a question: Can we safely collect data without humans in the loop, and eventually achieve an aggressive control goal? For example, landing the drone faster and faster." + "Commonly used input signals for system identification:\n", + "- Step function\n", + "- Pseudorandom binary sequence (PRBS)\n", + "- Autoregressive, moving average process\n", + "- Periodic signals: sum of sinusoids\n", + "- Schroeder Sweep\n", + "\n", + "For certain systems in many safety-critical tasks such as space exploration or robot-assisted surgery we have to rely on human experts to control the system and avoid damaging it or its environment." ] }, { @@ -409,7 +432,7 @@ "metadata": {}, "outputs": [], "source": [ - "cart_env = create_cart_environment(goal_position=9)" + "cart_env = create_cart_environment(max_steps=300, goal_position=20, max_position=30)" ] }, { @@ -421,44 +444,45 @@ "source": [ "cart_observations = []\n", "cart_actions = []\n", - "for _ in range(20):\n", - " result = simulate_environment(cart_env)\n", + "cart_frames = []\n", + "\n", + "controllers = {\n", + " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", + " np.asarray([cart_env.max_action / 2])\n", + " ),\n", + " \"Sinusoid @ 10Hz\": SineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequency=100\n", + " ),\n", + " \"Sinusoid @ 0.5Hz\": SineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequency=0.5\n", + " ),\n", + " \"Sum of Sinusoids @ [2Hz, 10Hz]\": SumOfSineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequencies=[2, 10]\n", + " ),\n", + " \"Random\": RandomController(cart_env),\n", + "}\n", + "\n", + "for controller in controllers.values():\n", + " result = simulate_environment(cart_env, controller=controller)\n", " cart_observations.append(result.observations)\n", - " cart_actions.append(result.actions)" + " cart_actions.append(result.actions)\n", + " cart_frames.append(result.frames)" ] }, { "cell_type": "code", "execution_count": null, - "id": "f2b4a5b6-ab5e-4d90-9c9e-fe4c6884badc", + "id": "cc1ea39d-5a37-49f2-bcca-d8d875ebcd36", "metadata": {}, "outputs": [], "source": [ - "fig, axes = plt.subplots(1, 2, sharex=True)\n", + "fig, axes = plt.subplots(len(controllers))\n", "axes = axes.ravel()\n", - "for i, label in zip(range(2), [\"$x$\", r\"$\\dot{x}$\"]):\n", - " for j in range(len(cart_observations)):\n", - " t = np.arange(len(cart_observations[j][i]))\n", - " axes[i].plot(t, cart_observations[j][i], label=label)\n", - " axes[i].set_xlabel(\"Time\")\n", - " axes[i].set_title(label)\n", - "fig.tight_layout()\n", - "plt.show();" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "728e13ac-0b8e-4934-b1ab-bb83a442dfde", - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax = plt.subplots(1, 1, sharex=True)\n", - "\n", - "for j in range(len(cart_observations)):\n", - " ax.plot(cart_observations[j][0], cart_observations[j][1])\n", - " ax.set_xlabel(\"$x$\")\n", - " ax.set_ylabel(\"$\\dot{x}$\")\n", + "for i, label in enumerate(controllers):\n", + " t = np.arange(len(cart_actions[i])) * cart_env.dt\n", + " axes[i].plot(t, cart_actions[i], label=label)\n", + " axes[i].set_xlabel(\"Time\")\n", + " axes[i].set_title(label)\n", "fig.tight_layout()\n", "plt.show();" ] @@ -466,103 +490,78 @@ { "cell_type": "code", "execution_count": null, - "id": "cc1ea39d-5a37-49f2-bcca-d8d875ebcd36", + "id": "f2b4a5b6-ab5e-4d90-9c9e-fe4c6884badc", "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots()\n", - "for j in range(len(cart_actions)):\n", - " t = np.arange(len(cart_actions[j]))\n", - " ax.plot(t, cart_actions[j])\n", - " ax.set_xlabel(\"Time\")\n", - " ax.set_title(\"$u$\")\n", + "fig, axes = plt.subplots(1, 2, sharex=True)\n", + "axes = axes.ravel()\n", + "for i, label in zip(range(2), [\"$x$\", r\"$\\dot{x}$\"]):\n", + " for j, controller_name in enumerate(controllers):\n", + " t = np.arange(len(cart_observations[j][:])) * cart_env.dt\n", + " axes[i].plot(t, cart_observations[j][:, i], label=controller_name)\n", + " axes[i].set_xlabel(\"Time\")\n", + " axes[i].set_title(label)\n", + "plt.legend()\n", "fig.tight_layout()\n", "plt.show();" ] }, { "cell_type": "markdown", - "id": "9cce48e1-ffc6-4c1a-b175-2ce58e7e29e6", - "metadata": {}, - "source": [ - "### Inverted Pendulum" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e929ae22-99b3-4a48-b9fc-e2e790d0e456", - "metadata": {}, - "outputs": [], - "source": [ - "inverted_pendulum_env = create_inverted_pendulum_environment(\n", - " max_steps=100, theta_threshold=np.deg2rad(45)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e40f135a-0e0a-4fe5-8827-2b66ab6408b5", + "id": "cce69707-84b6-4fac-a296-6b60cdd6c2f8", "metadata": {}, - "outputs": [], "source": [ - "inverted_pendulum_observations = []\n", - "inverted_pendulum_actions = []\n", - "for _ in range(20):\n", - " result = simulate_environment(inverted_pendulum_env)\n", - " inverted_pendulum_observations.append(result.observations)\n", - " inverted_pendulum_actions.append(result.actions)" + "Let's look at the phase portrait[^*] instead.\n", + "\n", + "[^*]: A phase portrait is a geometric representation of the trajectories of a dynamical system in the phase plane. Each set of initial conditions is represented by a different curve, or point." ] }, { "cell_type": "code", "execution_count": null, - "id": "a3f0befe-8e1d-46a1-8ee1-d30b47d250d3", + "id": "728e13ac-0b8e-4934-b1ab-bb83a442dfde", "metadata": {}, "outputs": [], "source": [ - "fig, axes = plt.subplots(2, 2, sharex=True)\n", - "axes = axes.ravel()\n", - "for i, label in zip(range(4), [\"$x$\", r\"$\\dot{x}$\", r\"$\\theta$\", r\"$\\dot{\\theta}$\"]):\n", - " for j in range(len(inverted_pendulum_observations)):\n", - " t = np.arange(len(inverted_pendulum_observations[j][i]))\n", - " axes[i].plot(t, inverted_pendulum_observations[j][i], label=label)\n", - " axes[i].set_xlabel(\"Time\")\n", - " axes[i].set_title(label)\n", + "fig, ax = plt.subplots(1, 1, sharex=True)\n", + "\n", + "for j, controller_name in enumerate(controllers):\n", + " ax.plot(\n", + " cart_observations[j][:, 0], cart_observations[j][:, 1], label=controller_name\n", + " )\n", + " ax.set_xlabel(\"$x$\")\n", + " ax.set_ylabel(\"$\\dot{x}$\")\n", + "plt.legend()\n", "fig.tight_layout()\n", "plt.show();" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "db98415f-cd1c-4e27-a201-1b51747523d0", + "cell_type": "markdown", + "id": "96163380-18c3-49e0-81f9-e397c60e3fbc", "metadata": {}, - "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 1)\n", - "for j in range(len(inverted_pendulum_observations)):\n", - " ax.plot(inverted_pendulum_observations[j][0], inverted_pendulum_observations[j][2])\n", - " ax.set_xlabel(\"$x$\")\n", - " ax.set_ylabel(r\"$\\theta$\")\n", - "fig.tight_layout()\n", - "plt.show();" + "Let's visualize the Power Spectral Density (PSD) of the input and state signals" ] }, { "cell_type": "code", "execution_count": null, - "id": "d2be7361-5371-4d83-901c-634d98b9b3ac", + "id": "e4af8b67-b339-45d6-bd78-aa15822c23f4", "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots()\n", - "for j in range(len(inverted_pendulum_actions)):\n", - " t = np.arange(len(inverted_pendulum_actions[j]))\n", - " ax.plot(t, inverted_pendulum_actions[j])\n", - " ax.set_xlabel(\"Time\")\n", - " ax.set_title(\"$u$\")\n", + "fig, axes = plt.subplots(1, 2, sharex=True)\n", + "axes = axes.ravel()\n", + "for i, label in zip(range(2), [\"$x$\", r\"$\\dot{x}$\"]):\n", + " for j, controller_name in enumerate(controllers):\n", + " f, Pxx_den = periodogram(cart_observations[j][:, i], 1 / cart_env.dt)\n", + " axes[i].semilogy(f, Pxx_den, label=controller_name)\n", + " axes[i].set_xlabel(\"frequency [Hz]\")\n", + " axes[i].set_ylabel(\"Power Spectral Density [V**2/Hz]\")\n", + " axes[i].set_title(label)\n", + "plt.legend()\n", "fig.tight_layout()\n", "plt.show();" ] @@ -662,6 +661,41 @@ "- Still susceptible to domain shift issues." ] }, + { + "cell_type": "markdown", + "id": "7994e6e7-f2c7-426b-8f78-8d4897de313c", + "metadata": {}, + "source": [ + "## Sparse Identification of Nonlinear Dynamics (SINDy)\n", + "\n", + ":::{figure} _static/images/70_sindy_diagram.svg\n", + ":width: 90%\n", + "\n", + "Schematic of the SINDy algorithm, demonstrated on the Lorenz equations. Data are collected from the system, including a time history of the states $\\mathbf{X}$ and derivatives $\\dot{\\mathbf{X}}$. Next, a library of nonlinear functions of the states, $\\Theta(\\mathbf{X})$, is constructed. This is then used to find the fewest terms needed to satisfy $\\dot{\\mathbf{X}} = \\Theta(\\mathbf{X})\\Xi$. The few entries in the vectors of $\\Xi$, solved for by sparse regression, denote the relevant\n", + "terms in the right-hand side of the dynamics. *Taken from {cite}`brunton_discovering_2016`*\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "6390573f-cc23-4a2d-a6ea-b3a4dc6c63cc", + "metadata": {}, + "source": [ + "### SINDy with Control (SINDyc)" + ] + }, + { + "cell_type": "markdown", + "id": "6eda563d-b736-4f78-8294-c2885016c374", + "metadata": {}, + "source": [ + ":::{figure} _static/images/70_sindy_with_control_diagram.svg\n", + ":width: 80%\n", + "\n", + "Schematic of the SINDy with control algorithm. Active terms in a library of candidate nonlinearities are selected via sparse regression. *Taken from {cite}`fasel_sindy_2021`*.\n", + ":::" + ] + }, { "cell_type": "markdown", "id": "8bad92c6-e587-41a9-9370-11f3f0cc7329", @@ -669,6 +703,16 @@ "source": [ "## Koopman Operator\n", "\n", + ":::{figure} _static/images/60_koopman_operators_summary.svg\n", + ":width: 70%\n", + ":align: center\n", + "---\n", + "name: Koopman Operators Summary\n", + "---\n", + "Summary of the idea of Koopman operators. By lifting to a space of observables, we\n", + "trade a nonlinear finite-dimensional system for a linear infinite-dimensional system. *Taken from {cite}`colbrook_multiverse_2023`*.\n", + ":::\n", + "\n", "Given $\\mathcal{F}$ a space of functions $g: \\Omega \\rightarrow \\mathbb{C}$, and $\\Omega$ the state space of our dynamical system. The Koopman operator is defined on a suitable domain $\\mathcal{D}(\\mathcal{K}) \\subset \\mathcal{F}$ via the composition formula:\n", "\n", "$$\n", @@ -698,22 +742,6 @@ "As a result, the KMD is invaluable for tasks such as dimensionality and model reduction. It generalizes the space-time separation of variables typically achieved through the Fourier transformor singular value decomposition (SVD)." ] }, - { - "cell_type": "markdown", - "id": "28d669d0-4b95-47fa-af56-5fe0317f492f", - "metadata": {}, - "source": [ - ":::{figure} _static/images/60_koopman_operators_summary.svg\n", - ":width: 70%\n", - ":align: center\n", - "---\n", - "name: Koopman Operators Summary\n", - "---\n", - "Summary of the idea of Koopman operators. By lifting to a space of observables, we\n", - "trade a nonlinear finite-dimensional system for a linear infinite-dimensional system {cite}`colbrook_multiverse_2023`.\n", - ":::" - ] - }, { "cell_type": "markdown", "id": "319c7c50-bb03-46ce-a268-2281cd9c1f0b", @@ -721,7 +749,7 @@ "source": [ "## Dynamic Mode Decomposition (DMD)\n", "\n", - "Dynamic Mode Decomposition (DMD) is a popular data-driven analysis technique used to decompose complex, nonlinear systems into a set of modes, revealing underlying patterns and dynamics through spectral analysis.\n", + "Dynamic Mode Decomposition (DMD) is a popular data-driven analysis technique used to decompose complex, nonlinear systems into a set of coherent structures (also called DMD modes), that grow, decay, and/or oscillate in time revealing underlying patterns and dynamics through spectral analysis. In other words, the DMD converts a dynamical system into a superposition of modes whose dynamics are governed by eigenvalues.\n", "\n", "The simplest and historically first interpretation of DMD is as a linear regression.\n", "\n", @@ -825,6 +853,11 @@ "source": [ "### DMD with Control (DMDc)\n", "\n", + ":::{figure} _static/images/70_dmdc_overview.svg\n", + ":width: 90%\n", + "Overview of Dynamic Mode Decomposition with Control (DMDc). *Adapted from {cite}`proctor_dynamic_2016`*.\n", + ":::\n", + "\n", "One of the most successful applications of the Koopman operator framework lies in control with demonstrated successes in various challenging appli-\n", "cations. These include fluid dynamics, robotics, power grids, biology, and chemical processes.\n", "\n", @@ -858,16 +891,176 @@ "A solution is given as $\\begin{pmatrix}A & B \\end{pmatrix} = \\mathbf{Y} \\Omega^{+}$." ] }, + { + "cell_type": "markdown", + "id": "5341126d-f79f-40c6-9461-33e8d953b4d2", + "metadata": {}, + "source": [ + "## Comparison" + ] + }, + { + "cell_type": "markdown", + "id": "d830cd3e-358d-4a89-9a29-3136dd5da37e", + "metadata": {}, + "source": [ + ":::{figure} _static/images/70_comparison_identification_methods.svg\n", + ":width: 80%\n", + "\n", + "Overview of various methods that use regression to identify dynamics from data. *Taken from {cite}`brunton_sparse_2016`*.\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "3259df92-4aaa-4542-9768-961d9630c5b1", + "metadata": {}, + "source": [ + "# Application to Systems" + ] + }, { "cell_type": "markdown", "id": "e2ec06ac-e9e8-4918-9ea8-8107a16f7104", "metadata": {}, "source": [ - "#### Cart\n", + "## Cart\n", "\n", "We will use the DMDc method to fit a model on the data collected from the Cart environment. For that we will make sure of the [pykoopman](https://pykoopman.readthedocs.io/en/master/index.html) package." ] }, + { + "cell_type": "markdown", + "id": "b0213428-60d2-467a-ae31-c22e6dd14094", + "metadata": {}, + "source": [ + "### Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8e8b5eb-62db-494c-95d1-1f1eda218655", + "metadata": {}, + "outputs": [], + "source": [ + "cart_env = create_cart_environment(max_steps=300, goal_position=29, max_position=30)\n", + "cart_observations = []\n", + "cart_actions = []\n", + "\n", + "controllers = {\n", + " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", + " np.asarray([cart_env.max_action / 2])\n", + " ),\n", + " \"Sinusoid @ 10Hz\": SineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequency=100\n", + " ),\n", + " \"Sinusoid @ 0.5Hz\": SineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequency=0.5\n", + " ),\n", + " \"Sum of Sinusoids @ [2Hz, 10Hz]\": SumOfSineController(\n", + " cart_env, np.asarray([cart_env.max_action]), frequencies=[2, 10]\n", + " ),\n", + " \"Random\": RandomController(cart_env),\n", + "}\n", + "\n", + "for controller in controllers.values():\n", + " result = simulate_environment(cart_env, controller=controller)\n", + " cart_observations.append(result.observations)\n", + " cart_actions.append(result.actions)" + ] + }, + { + "cell_type": "markdown", + "id": "d3bb4af8-22b2-4857-98c4-81d9d978d2d8", + "metadata": {}, + "source": [ + "### SINDYc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78e0f204-0e80-4a8e-854e-1926ad6c3843", + "metadata": {}, + "outputs": [], + "source": [ + "# Train with random\n", + "X_train = cart_observations[-1][:-1].copy()\n", + "U_train = cart_actions[-1].copy()\n", + "t_train = np.arange(0, len(X_train)) * cart_env.dt\n", + "# Test with step\n", + "X_test = cart_observations[0][:-1].copy()\n", + "U_test = cart_actions[0].copy()\n", + "t_test = np.arange(0, len(X_test)) * cart_env.dt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af3d0377-fadd-4844-8217-a4da41d72d51", + "metadata": {}, + "outputs": [], + "source": [ + "model = ps.SINDy()\n", + "model.fit(X_train, u=U_train, t=t_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f4878e9-df19-4541-93cc-e205d6fdc173", + "metadata": {}, + "outputs": [], + "source": [ + "model.print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "616e27df-8e26-4a16-b96b-211f8ed4646e", + "metadata": {}, + "outputs": [], + "source": [ + "X_test_sim = model.simulate(X_test[0], t_test, u=U_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05a43345-f83d-4a4c-9f96-1b202abab5a3", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(X_test.shape[1], 1, sharex=True, figsize=(7, 9))\n", + "for i in range(X_test.shape[1]):\n", + " axs[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", + " axs[i].plot(t_test[1:], X_test_sim[:, i], \"r--\", label=\"Model\")\n", + " axs[i].legend()\n", + " axs[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", + "\n", + "fig = plt.figure(figsize=(10, 4.5))\n", + "ax1 = fig.add_subplot(1, 2, 1)\n", + "ax1.plot(X_test[:, 0], X_test[:, 1], \"k\")\n", + "ax1.set(xlabel=\"$x_1$\", ylabel=\"$x_2$\", title=\"true simulation\")\n", + "\n", + "ax2 = fig.add_subplot(1, 2, 2)\n", + "ax2.plot(X_test_sim[:, 0], X_test_sim[:, 1], \"r--\")\n", + "ax2.set(xlabel=\"$x_1$\", ylabel=\"$x_2$\", title=\"model simulation\")\n", + "\n", + "plt.tight_layout()\n", + "fig.show();" + ] + }, + { + "cell_type": "markdown", + "id": "82c5089b-49e7-49e6-8824-2fc1c71bd08b", + "metadata": {}, + "source": [ + "### DMDc" + ] + }, { "cell_type": "code", "execution_count": null, @@ -875,8 +1068,14 @@ "metadata": {}, "outputs": [], "source": [ - "X = np.concatenate([obs[:-1] for obs in cart_observations])\n", - "U = np.concatenate(cart_actions)" + "# Train with random\n", + "X_train = cart_observations[-1][:-1].copy()\n", + "U_train = cart_actions[-1].copy()\n", + "t_train = np.arange(0, len(X_train)) * cart_env.dt\n", + "# Test with step\n", + "X_test = cart_observations[0][:-1].copy()\n", + "U_test = cart_actions[0].copy()\n", + "t_test = np.arange(0, len(X_test)) * cart_env.dt" ] }, { @@ -886,9 +1085,9 @@ "metadata": {}, "outputs": [], "source": [ - "DMDc = pk.regression.DMDc()\n", + "DMDc = pk.regression.DMDc(svd_output_rank=4, svd_rank=6)\n", "model = pk.Koopman(regressor=DMDc)\n", - "model.fit(X, u=U, dt=cart_env.dt)" + "model.fit(X_train, u=U_train, dt=cart_env.dt)" ] }, { @@ -927,8 +1126,8 @@ "metadata": {}, "outputs": [], "source": [ - "Xkoop = model.simulate(X[0], U, n_steps=X.shape[0] - 1)\n", - "Xkoop = np.vstack([X[0][np.newaxis, :], Xkoop])" + "Xkoop = model.simulate(X_test[0], U_test, n_steps=X_test.shape[0] - 1)\n", + "Xkoop = np.vstack([X_test[0][np.newaxis, :], Xkoop])" ] }, { @@ -940,17 +1139,15 @@ "source": [ "t = np.arange(0, len(U))\n", "fig, axs = plt.subplots(3, 1, sharex=True, figsize=(16, 8))\n", - "axs[0].plot(t, U, \"-k\")\n", + "axs[0].plot(t_test, U_test, \"-k\")\n", "axs[0].set(ylabel=r\"$u$\")\n", - "axs[1].plot(t, X[:, 0], \"-\", color=\"b\", label=\"True\")\n", - "axs[1].plot(t, Xkoop[:, 0], \"--r\", label=\"DMDc\")\n", + "axs[1].plot(t_test, X_test[:, 0], \"-\", color=\"b\", label=\"True\")\n", + "axs[1].plot(t_test, Xkoop[:, 0], \"--r\", label=\"DMDc\")\n", "axs[1].set(ylabel=r\"$x_1$\")\n", - "axs[2].plot(t, X[:, 1], \"-\", color=\"b\", label=\"True\")\n", - "axs[2].plot(t, Xkoop[:, 1], \"--r\", label=\"DMDc\")\n", + "axs[2].plot(t_test, X_test[:, 1], \"-\", color=\"b\", label=\"True\")\n", + "axs[2].plot(t_test, Xkoop[:, 1], \"--r\", label=\"DMDc\")\n", "axs[2].set(ylabel=r\"$x_2$\", xlabel=r\"$t$\")\n", "axs[1].legend(loc=\"best\")\n", - "axs[1].set_ylim([-8, 8])\n", - "axs[2].set_ylim([-8, 8])\n", "fig.tight_layout();" ] }, @@ -982,10 +1179,10 @@ "id": "5d0f5a15-4544-4001-8a59-d280bc21f929", "metadata": {}, "source": [ - ":::{exercise} Inverted Pendulum DMDc\n", - ":label: inverted-pendulum-dmdc\n", + ":::{exercise} Inverted Pendulum\n", + ":label: inverted-pendulum-data-model\n", "\n", - "Use the DMDc method to fit a model on the data collected from the inverted pendulum environment.\n", + "Use the SINDYc or DMDc method to fit a model on the data collected from the inverted pendulum environment.\n", ":::" ] }, @@ -994,7 +1191,7 @@ "id": "391a6e1f-86c5-4511-b026-011f2385279e", "metadata": {}, "source": [ - ":::{solution} inverted-pendulum-dmdc\n", + ":::{solution} inverted-pendulum-data-model\n", ":::" ] }, @@ -1013,7 +1210,7 @@ "id": "3830d079-29d8-45ea-ad1b-c40d288ec7a7", "metadata": {}, "source": [ - ":::{solution} inverted-pendulum-dmdc\n", + ":::{solution} inverted-pendulum-data-model\n", ":class: dropdown\n", "**Work in Progress**\n", "\n", diff --git a/notebooks/nb_80_safe_learning_control.ipynb b/notebooks/nb_80_safe_learning_control.ipynb index 14a8cf0..0812f66 100644 --- a/notebooks/nb_80_safe_learning_control.ipynb +++ b/notebooks/nb_80_safe_learning_control.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "init_cell": true, "scene__Default Scene": true, @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "init_cell": true, "scene__Default Scene": true, @@ -32,7 +32,177 @@ "ActiveScene" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%presentation_style" ] @@ -107,9 +277,9 @@ } }, "source": [ - ":::{figure} _static/images/70_comparison_model_driven_data_driven.svg\n", - ":width: 90%\n", - "A comparison of model-driven, data-driven, and combined approaches {cite}`brunke_safe_2022`.\n", + ":::{figure} _static/images/80_comparison_model_driven_data_driven.svg\n", + ":width: 80%\n", + "A comparison of model-driven, data-driven, and combined approaches. *Taken from {cite}`brunke_safe_2022`*.\n", ":::" ] }, @@ -139,9 +309,9 @@ } }, "source": [ - ":::{figure} _static/images/40_safe_control_block_diagram.svg\n", + ":::{figure} _static/images/80_safe_control_block_diagram.svg\n", ":width: 80%\n", - "Block diagram representing safe learning control approaches {cite}`brunke_safe_2022`.\n", + "Block diagram representing safe learning control approaches. *Taken from {cite}`brunke_safe_2022`*.\n", ":::" ] }, @@ -164,9 +334,9 @@ } }, "source": [ - ":::{figure} _static/images/70_safety_levels.svg\n", + ":::{figure} _static/images/80_safety_levels.svg\n", ":width: 100%\n", - "Illustration of Safety Levels {cite}`brunke_safe_2022`.\n", + "Illustration of Safety Levels. *Taken from {cite}`brunke_safe_2022`*.\n", ":::" ] }, @@ -332,10 +502,10 @@ } }, "source": [ - ":::{figure} _static/images/70_safety_filter.svg\n", + ":::{figure} _static/images/80_safety_filter.svg\n", ":width: 60%\n", "Based on the current state $x$, a learning-based controller provides an input\n", - "$u_L = \\pi_L(x) \\in \\mathbb{R}^m$, which is processed by the safety filter $u = \\pi_S(x, u_S)$ and applied to the real system {cite}`hewing_learningbased_2020`.\n", + "$u_L = \\pi_L(x) \\in \\mathbb{R}^m$, which is processed by the safety filter $u = \\pi_S(x, u_S)$ and applied to the real system. *Taken from {cite}`hewing_learningbased_2020`*.\n", ":::" ] }, @@ -388,9 +558,9 @@ } }, "source": [ - ":::{figure} _static/images/40_safe_learning_approaches.svg\n", + ":::{figure} _static/images/80_safe_learning_approaches.svg\n", ":width: 70%\n", - "Summary of safe learning control approaches {cite}`brunke_safe_2022`.\n", + "Summary of safe learning control approaches. *Taken from {cite}`brunke_safe_2022`*.\n", ":::" ] } diff --git a/notebooks/nb_90_practice.ipynb b/notebooks/nb_90_practice.ipynb new file mode 100644 index 0000000..4c2eb72 --- /dev/null +++ b/notebooks/nb_90_practice.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8497a4b2-a020-4fba-8539-bc1361a34023", + "metadata": { + "editable": true, + "init_cell": true, + "scene__Default Scene": true, + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "remove-cell", + "ActiveScene" + ] + }, + "outputs": [], + "source": [ + "%%capture\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline\n", + "%load_ext training_ml_control\n", + "%set_random_seed 12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7339d3d-eae4-4e05-a7fb-050df96782e7", + "metadata": { + "init_cell": true, + "scene__Default Scene": true, + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "remove-cell", + "ActiveScene" + ] + }, + "outputs": [], + "source": [ + "%presentation_style" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e892149-098a-42c7-b554-bbb83918888d", + "metadata": { + "init_cell": true, + "scene__Default Scene": true, + "tags": [ + "ActiveScene" + ] + }, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.simplefilter(\"ignore\", UserWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd78d373-34af-4b57-8d16-dbf6f662331d", + "metadata": { + "init_cell": true, + "scene__Default Scene": true, + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "remove-cell", + "ActiveScene" + ] + }, + "outputs": [], + "source": [ + "%autoreload\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pykoopman as pk\n", + "import pysindy as ps\n", + "\n", + "from training_ml_control.control import (\n", + " ConstantController,\n", + " SineController,\n", + " SumOfSineController,\n", + " RandomController,\n", + ")\n", + "from training_ml_control.environments import (\n", + " create_acrobot_environment,\n", + " simulate_environment,\n", + ")\n", + "from training_ml_control.nb_utils import show_video, display_array\n", + "\n", + "sns.set_theme()\n", + "plt.rcParams[\"figure.figsize\"] = [12, 8]" + ] + }, + { + "cell_type": "markdown", + "id": "67ad3b3e-67cf-4b3d-875d-96496d23d193", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "slide" + }, + "tags": [ + "remove-cell" + ] + }, + "source": [ + ":::{figure} ./_static/images/aai-institute-cover.png\n", + ":width: 90%\n", + ":align: center\n", + "---\n", + "name: aai-institute\n", + "---\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "77372629-782f-44b6-ad24-baedbca2fbc2", + "metadata": {}, + "source": [ + "# Practice\n", + "\n", + "In this section, you will be tasked with solving a control problem from start to finish.\n", + "\n", + "Feel free to proceed as you wish. You could use a mathematical model or learn a model from data and then attempt to control it." + ] + }, + { + "cell_type": "markdown", + "id": "c63180f2-e769-4c43-999e-5e46924c1fbb", + "metadata": {}, + "source": [ + ":::{exercise-start} Acrobot\n", + ":label: acrobot-exercise\n", + "\n", + "We will be using a modified version of the [Acrobot](https://gymnasium.farama.org/environments/classic_control/acrobot/) environment from [gymnasium](https://gymnasium.farama.org/).\n", + "\n", + "The system consists of two links connected linearly to form a chain, with one end of the chain fixed. The joint between the two links is actuated. \n", + "\n", + "As seen below, the links are represented in blue and are connected by two joints represented in green (or yellow?).\n", + "\n", + "If you would like to use a mathematical model of the system, then please refer to [the following page](http://incompleteideas.net/book/11/node4.html) for the equations.\n", + "\n", + "**First Goal**\n", + "\n", + "The first goal is to apply torques on the actuated joint to swing the free end of the linear chain above a (configurable) target height (black horizontal line above system) while starting from the initial state of hanging downwards.\n", + "\n", + "**Second Goal**\n", + "\n", + "The second goal is to apply torques on the actuated joint to swing the free end of the linear chain as fast as possible while remaining below the target height.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20556f5c-61e5-46b4-9cf1-9b8097ec430e", + "metadata": {}, + "outputs": [], + "source": [ + "env = create_acrobot_environment()\n", + "result = simulate_environment(env)\n", + "show_video(result.frames, fps=env.metadata[\"render_fps\"])" + ] + }, + { + "cell_type": "markdown", + "id": "1141f1a7-64d9-436b-bd08-45bc823232a6", + "metadata": {}, + "source": [ + ":::{exercise-end}\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c72b43f0-abea-4a7e-ace1-b09c9a848278", + "metadata": {}, + "outputs": [], + "source": [ + "# Your solution here" + ] + }, + { + "cell_type": "markdown", + "id": "d503889d-7d14-4d7b-9f55-78099eb5e442", + "metadata": {}, + "source": [ + ":::{solution} acrobot-exercise\n", + ":class: dropdown\n", + "\n", + "**Work in Progress**\n", + ":::" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "scenes_data": { + "active_scene": "Default Scene", + "init_scene": "Default Scene", + "scenes": [ + "Default Scene" + ] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/training_ml_control/control.py b/src/training_ml_control/control.py index 34f6e87..1b6991c 100644 --- a/src/training_ml_control/control.py +++ b/src/training_ml_control/control.py @@ -10,6 +10,8 @@ "FeedbackController", "Observer", "ConstantController", + "SineController", + "SumOfSineController", "RandomController", "build_lqr_controller", "build_mpc_controller", @@ -17,12 +19,12 @@ class FeedbackController(Protocol): - def control(self, observation: NDArray) -> NDArray: + def control(self, measurement: NDArray) -> NDArray: ... class Observer(Protocol): - def observe(self, measrument: NDArray) -> NDArray: + def observe(self, measurement: NDArray) -> NDArray: ... @@ -30,15 +32,83 @@ class ConstantController: def __init__(self, u: NDArray = np.zeros(1)) -> None: self.u = u - def act(self, observation: NDArray) -> NDArray: + def act(self, measurement: NDArray) -> NDArray: return self.u +class SineController: + def __init__( + self, env: Env, u_max: NDArray = np.asarray([10]), frequency: float = 1 + ) -> None: + self.dt = env.unwrapped.dt + self.u_max = u_max + self.frequency = frequency + self.i = 0 + + def act(self, measurement: NDArray) -> NDArray: + t = self.dt * self.i + self.i += 1 + u = self.u_max * np.sin(2 * np.pi * self.frequency * t) + return u + + +class SumOfSineController: + def __init__( + self, + env: Env, + u_max: NDArray = np.asarray([10]), + frequencies: list[float] = [1.0], + ) -> None: + self.dt = env.unwrapped.dt + self.u_max = u_max + self.frequencies = frequencies + self.i = 0 + + def act(self, measurement: NDArray) -> NDArray: + t = self.dt * self.i + self.i += 1 + u = np.asarray([0.0]) + for frequency in self.frequencies: + u += np.sin(2 * np.pi * frequency * t) + u *= self.u_max + return u + + +class SchroederSweepController: + def __init__( + self, + env: Env, + u_max: NDArray = np.asarray([10]), + n_time_steps: int = 200, + input_power: float = 10, + n_harmonics: int = 3, + ) -> None: + self.dt = env.unwrapped.dt + self.u_max = u_max + self.input_power = input_power + self.n_time_steps = n_time_steps + self.n_harmonics = n_harmonics + self.amplitude = np.sqrt(self.input_power / self.n_harmonics) + self.phis = np.zeros(self.n_harmonics) + for k in range(1, self.n_harmonics): + self.phis[k] = self.phis[k - 1] - np.pi * k**2 / self.n_time_steps + self.i = 0 + + def act(self, measurement: NDArray) -> NDArray: + t = self.dt * self.i + self.i += 1 + u = np.asarray([0.0]) + for k, phi in enumerate(self.phis): + u += np.cos(2 * np.pi * (k + 1) * t + phi) + u *= self.amplitude + return u + + class RandomController: def __init__(self, env: Env) -> None: self.action_space = env.action_space - def act(self, observation: NDArray) -> NDArray: + def act(self, measurment: NDArray) -> NDArray: return self.action_space.sample() diff --git a/src/training_ml_control/environments/acrobot.py b/src/training_ml_control/environments/acrobot.py new file mode 100644 index 0000000..fb153d7 --- /dev/null +++ b/src/training_ml_control/environments/acrobot.py @@ -0,0 +1,94 @@ +import numpy as np +from gymnasium import spaces +from gymnasium.envs.classic_control.acrobot import AcrobotEnv, bound, rk4, wrap +from numpy import cos, pi +from numpy.typing import NDArray + +__all__ = ["ContinuousAcrobotEnv"] + + +class ContinuousAcrobotEnv(AcrobotEnv): + r"""The continuous acrobot environment problem is based on the classic problem. + + This class is a modified version of the `AcrobotEnv` + environment from gymnasium that modifies that environment to taken continuous + value for the action. + + ## Action Space + + The action is continuous, deterministic, and represents the torque applied + on the actuated joint between the two links. + + +-----+---------------------------+-------------+-------------+ + | Num | Action | Control Min | Control Max | + +=====+===========================+=============+=============+ + | 0 | Torque applied to the actuated joint | -1.0 | 1.0 | + +-----+---------------------------+-------------+-------------+ + + """ + + def __init__( + self, + render_mode: str | None = None, + max_torque: float = 1.0, + *, + torque_noise_max: float = 0.0, + target_height: float = 1.0, + ): + self.render_mode = render_mode + self.screen = None + self.clock = None + self.isopen = True + + self.torque_noise_max = torque_noise_max + self.target_height = target_height + self.max_torque = max_torque + + if self.target_height > self.LINK_LENGTH_1 + self.LINK_LENGTH_2: + raise ValueError( + f"Target height, {self.target_height} is too high." + f"It should be less than {self.LINK_LENGTH_1 + self.LINK_LENGTH_2}" + ) + + high = np.array( + [1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32 + ) + low = -high + self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) + self.action_space = spaces.Box( + low=-self.max_torque, high=self.max_torque, dtype=np.float32 + ) + self.state = None + + def step(self, a: NDArray) -> tuple[NDArray, float, bool, bool, dict]: + s = self.state + assert s is not None, "Call reset before using AcrobotEnv object." + torque = a + # Add noise to the force action + if self.torque_noise_max > 0: + torque += self.np_random.uniform( + -self.torque_noise_max, self.torque_noise_max + ) + + # Now, augment the state with our force action so it can be passed to + # _dsdt + s_augmented = np.append(s, torque) + + ns = rk4(self._dsdt, s_augmented, [0, self.dt]) + + ns[0] = wrap(ns[0], -pi, pi) + ns[1] = wrap(ns[1], -pi, pi) + ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) + ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) + self.state = ns + terminated = self._terminal() + reward = -1.0 if not terminated else 0.0 + + if self.render_mode == "human": + self.render() + return (self._get_ob(), reward, terminated, False, {}) + + def _terminal(self): + s = self.state + assert s is not None, "Call reset before using AcrobotEnv object." + return bool(-cos(s[0]) - cos(s[1] + s[0]) > self.target_height) diff --git a/src/training_ml_control/environments/utils.py b/src/training_ml_control/environments/utils.py index 98b678f..1ff64fc 100644 --- a/src/training_ml_control/environments/utils.py +++ b/src/training_ml_control/environments/utils.py @@ -9,6 +9,7 @@ from numpy.typing import NDArray from training_ml_control.control import FeedbackController, Observer, RandomController +from training_ml_control.environments.acrobot import ContinuousAcrobotEnv from training_ml_control.environments.cart import CartEnv from training_ml_control.environments.grid_world import GridWorldEnv from training_ml_control.environments.inverted_pendulum import InvertedPendulumEnv @@ -17,6 +18,7 @@ "create_inverted_pendulum_environment", "create_grid_world_environment", "create_cart_environment", + "create_acrobot_environment", "simulate_environment", "value_iteration", "compute_best_path_and_actions_from_values", @@ -52,6 +54,29 @@ def create_cart_environment( return env +def create_acrobot_environment( + render_mode: str | None = "rgb_array", + *, + max_steps: int = 200, + target_height: float = 1.0, + max_torque: float = 1.0, +) -> Env: + """Creates instance of ContinuousAcrobotEnv with some wrappers + to ensure correctness, limit the number of steps and store rendered frames. + """ + env = ContinuousAcrobotEnv( + render_mode=render_mode, + target_height=target_height, + max_torque=max_torque, + ) + env = TimeLimit(env, max_steps) + # env = PassiveEnvChecker(env) + env = OrderEnforcing(env) + if render_mode is not None: + env = RenderCollection(env) + return env + + def create_grid_world_environment( render_mode: str | None = "rgb_array", *, @@ -76,6 +101,7 @@ def create_inverted_pendulum_environment( masscart: float | None = None, length: float | None = None, x_threshold: float = 3, + theta_initial: float = 0.0, theta_threshold: float = 24, force_max: float = 10.0, ) -> Env: @@ -101,6 +127,7 @@ def create_inverted_pendulum_environment( length=length, x_threshold=x_threshold, theta_threshold=theta_threshold, + theta_initial=theta_initial, force_max=force_max, render_mode=render_mode, ) diff --git a/src/training_ml_control/shortest_path_problem.py b/src/training_ml_control/shortest_path_problem.py index 50e569b..e62aec5 100644 --- a/src/training_ml_control/shortest_path_problem.py +++ b/src/training_ml_control/shortest_path_problem.py @@ -81,7 +81,7 @@ def create_shortest_path_graph() -> nx.DiGraph: """Create shortest-path problem graph.""" G = nx.DiGraph() edge_list = [ - ("A", "B", 1), + ("A", "B", 4), ("A", "C", 5), ("A", "D", 3), ("B", "D", 9), @@ -89,7 +89,7 @@ def create_shortest_path_graph() -> nx.DiGraph: ("C", "F", 2), ("D", "F", 5), ("D", "G", 8), - ("E", "G", 4), + ("E", "G", 1), ("F", "G", 1), ] G.add_weighted_edges_from(edge_list)