diff --git a/notebooks/nb_20_dynamic_programming.ipynb b/notebooks/nb_20_dynamic_programming.ipynb index 85a3dbc..54c9e4f 100644 --- a/notebooks/nb_20_dynamic_programming.ipynb +++ b/notebooks/nb_20_dynamic_programming.ipynb @@ -15,11 +15,8 @@ "slide_type": "skip" }, "tags": [ - "remove-input", - "remove-output", - "remove-input-nbconv", - "remove-output-nbconv", - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,7 +43,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -58,13 +56,18 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true }, "scene__Initialization": true, + "slideshow": { + "slide_type": "" + }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -88,7 +91,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -260,6 +264,8 @@ { "cell_type": "markdown", "metadata": { + "editable": true, + "raw_mimetype": "", "slideshow": { "slide_type": "subslide" } @@ -284,7 +290,7 @@ "\n", "$$\n", "\\begin{equation}\n", - "u^*_k = \\displaystyle \\argmin_{u \\in \\mathbf{U}}\n", + "u^*_k = \\displaystyle \\underset{u \\in \\mathbf{U}}{\\text{argmin}} \n", "\\left[ c(\\mathbf{x}_k, \\mathbf{u}_k) + V(f(\\mathbf{x}_k, \\mathbf{u}_k)) \\right].\n", "\\end{equation}\n", "$$" @@ -293,6 +299,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -325,7 +332,7 @@ "slide_type": "" }, "tags": [ - "hide-input" + "remove-input" ] }, "outputs": [], @@ -336,7 +343,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "We wish to travel from node A to node G at minimum cost. If the cost represents time then we want to find the shortest path from A to G.\n", "\n", @@ -348,7 +360,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "We start by determining all possible paths first ." ] @@ -356,7 +373,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "plot_all_paths_graph(G)" @@ -368,17 +393,16 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [ - "remove-cell" - ] + } }, "source": [ "We then compute the cost-to-go at each node to determine the shortest path.\n", "\n", "Each node in this new graph represents a state. We will start from the tail (the last states) and compute recursively the cost for each state transition.\n", "\n", - "Let $c(n_1, n_2)$ the cost of moving from node $n_1$ to node $n_2$ and $V(n)$ be the optimal cost-to-go from node $n$. We have $$V({\\text{G}}) = 0$$.\n", + "Let $c(n_1, n_2)$ the cost of moving from node $n_1$ to node $n_2$ and $V(n)$ be the optimal cost-to-go from node $n$.\n", + "\n", + "We have $V({\\text{G}}) = 0$ because the cost of going from node $G$ to itself is 0.\n", "\n", "We start with nodes **F** and **E**:\n", "\n", @@ -424,22 +448,22 @@ "Now that we have computed the optimal cost-to-go, we can proceed in a forward manner to determine the best path:\n", "\n", "$$\n", - "\\pi^* = \\underset{n}{\\argmin} [c(n_1, n_2) + V(n_2)]\n", + "\\pi^* = \\underset{n}{\\text{argmin}} [c(n_1, n_2) + V(n_2)]\n", "$$\n", "\n", "For the first action (step) we have:\n", "\n", "$$\n", - "\\pi^*_0 &= \\underset{n_2 \\in \\{ B, C, D \\}}{\\argmin} \\left[ c(A, n_2) + V(n_2) \\right] \\\\ \n", - "&= \\underset{n_2}{\\argmin} \\left[ c(A, n_2 = B) + V(n_2 = B), c(A, n_2 = C) + V(n_2 = C), c(A, n_2 = D) + V(n_2 = D) \\right] \\\\\n", - "&= \\underset{n_2}{\\argmin} \\left[ 4 + 2, 5 + 3, 3 + 6 \\right] \\\\\n", + "\\pi^*_0 &= \\underset{n_2 \\in \\{ B, C, D \\}}{\\text{argmin}} \\left[ c(A, n_2) + V(n_2) \\right] \\\\ \n", + "&= \\underset{n_2}{\\text{argmin}} \\left[ c(A, n_2 = B) + V(n_2 = B), c(A, n_2 = C) + V(n_2 = C), c(A, n_2 = D) + V(n_2 = D) \\right] \\\\\n", + "&= \\underset{n_2}{\\text{argmin}} \\left[ 4 + 2, 5 + 3, 3 + 6 \\right] \\\\\n", "&= B\n", "$$\n", "\n", "Proceeding the same way we get:\n", "\n", "$$\n", - "\\pi^* &= \\{\\pi^*_0, \\pi^*_1, \\pi^*_2\\} &= \\{\\text{B, E, G} \\}\n", + "\\pi^* = \\{ \\pi^*_0, \\pi^*_1, \\pi^*_2 \\} = \\{ \\text{B, E, G} \\}\n", "$$\n", "\n", "The shortest-path is ABEG." @@ -448,7 +472,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "plot_all_paths_graph(G, show_solution=True)" @@ -456,7 +488,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "### Value Iteration\n", "\n", @@ -528,7 +565,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "%%html\n", @@ -538,7 +583,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "env = create_grid_world_environment(render_mode=\"rgb_array\", max_steps=50)\n", @@ -556,7 +609,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "env.reset()\n", @@ -574,7 +635,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "G = convert_graph_to_directed(G)\n", @@ -583,7 +652,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "We wish for the car to travel from its starting cell in red to the target cell in green. If the cost represents time and each step has the same cost then we want to find the shortest path to the goal.\n", "\n", @@ -611,13 +685,18 @@ "Compute the optimal cost-to-go at each node.\n", "\n", "You can use `dict(G.nodes(data=True))` to get a dictionary that maps the nodes to their attributes\n", - "and you can use `G.start_node` and `G.target_node` to access the start and end (i.e. goal) nodes, respectively.\n", + "and you can use `G.start_node` and `G.target_node` to access the start and target nodes, respectively.\n", ":::" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ ":::{exercise-end}\n", ":::" @@ -625,7 +704,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ "````{solution} grid-world\n", "````" @@ -634,7 +721,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" diff --git a/notebooks/nb_30_systems.ipynb b/notebooks/nb_30_systems.ipynb index bdee839..7cd7532 100644 --- a/notebooks/nb_30_systems.ipynb +++ b/notebooks/nb_30_systems.ipynb @@ -15,11 +15,8 @@ "slide_type": "skip" }, "tags": [ - "remove-input", - "remove-output", - "remove-input-nbconv", - "remove-output-nbconv", - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,7 +43,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -58,12 +56,17 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true }, "scene__Initialization": true, + "slideshow": { + "slide_type": "" + }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -87,7 +90,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -110,7 +114,7 @@ " animate_inverted_pendulum_simulation,\n", " animate_full_inverted_pendulum_simulation,\n", ")\n", - "from training_ml_control.model import (\n", + "from training_ml_control.models import (\n", " build_cart_model,\n", " build_inverted_pendulum_linear_model,\n", " build_inverted_pendulum_nonlinear_model,\n", @@ -172,7 +176,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "cart_env = create_cart_environment(goal_position=9)\n", @@ -183,7 +195,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "T = np.arange(len(results.observations)) * cart_env.dt\n", @@ -197,7 +217,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "For the simulation we will use a modified version of the [Mountain Car Continuous](https://gymnasium.farama.org/environments/classic_control/mountain_car_continuous/) environment from [gymnasium](https://gymnasium.farama.org/).\n", "\n", @@ -246,7 +271,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "### Model\n", "\n", @@ -291,7 +321,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "outputs": [], "source": [ "cart_model = build_cart_model(cart_env)" @@ -312,6 +347,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -333,7 +369,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "cart_simulator.reset_history()\n", @@ -341,15 +385,8 @@ "cart_simulator.x0 = x0\n", "for i in range(len(results.observations) - 1):\n", " u = results.actions[[i]]\n", - " x0 = cart_simulator.make_step(u)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + " x0 = cart_simulator.make_step(u)\n", + "\n", "animate_cart_simulation(cart_simulator.data)" ] }, @@ -480,7 +517,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "T = np.arange(len(results.observations)) * inverted_pendulum_env.dt\n", @@ -693,7 +738,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "x0 = results.observations[0, 2:]\n", @@ -703,21 +756,31 @@ "\n", "for i in range(len(results.observations) - 1):\n", " u = results.actions[[i]]\n", - " x0 = inverted_pendulum_linear_simulator.make_step(u)" + " x0 = inverted_pendulum_linear_simulator.make_step(u)\n", + "\n", + "animate_inverted_pendulum_simulation(inverted_pendulum_linear_simulator.data)" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ - "animate_inverted_pendulum_simulation(inverted_pendulum_linear_simulator.data)" + "We notice that the simulation quickly diverges as the states moves away from the origin. The model should still be good for the purpose of controlling the system to stay near the origin." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "### Full Non-Linear Model" ] @@ -772,7 +835,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "x0 = results.observations[0]\n", @@ -782,15 +853,8 @@ "\n", "for i in range(len(results.observations) - 1):\n", " u = results.actions[[i]]\n", - " x0 = inverted_pendulum_nonlinear_simulator.make_step(u)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + " x0 = inverted_pendulum_nonlinear_simulator.make_step(u)\n", + "\n", "animate_full_inverted_pendulum_simulation(inverted_pendulum_nonlinear_simulator.data)" ] } diff --git a/notebooks/nb_40_LQR.ipynb b/notebooks/nb_40_LQR.ipynb index 9565f0a..867c9de 100644 --- a/notebooks/nb_40_LQR.ipynb +++ b/notebooks/nb_40_LQR.ipynb @@ -15,11 +15,8 @@ "slide_type": "skip" }, "tags": [ - "remove-input", - "remove-output", - "remove-input-nbconv", - "remove-output-nbconv", - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,7 +43,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -58,12 +56,17 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true }, "scene__Initialization": true, + "slideshow": { + "slide_type": "" + }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -77,11 +80,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "editable": true, "init_cell": true, "jupyter": { - "outputs_hidden": true, "source_hidden": true }, "scene__Initialization": true, @@ -89,7 +90,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -325,7 +327,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] + }, "outputs": [], "source": [ "Q = np.diag([100, 1])\n", @@ -346,7 +356,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "cart_lqr_controller = build_lqr_controller(\n", @@ -376,9 +392,13 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "fragment" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -388,28 +408,19 @@ "cart_simulator.x0 = x0\n", "for _ in range(200):\n", " u = cart_lqr_controller.make_step(x0)\n", - " x0 = cart_simulator.make_step(u)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ + " x0 = cart_simulator.make_step(u)\n", + "\n", "animate_cart_simulation(cart_lqr_controller.data, reference=cart_env.goal_position)" ] }, { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "## Evaluation\n", @@ -421,9 +432,11 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -441,24 +454,33 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ - "max_steps = 200\n", "cart_controller = LQRController(K=cart_lqr_controller.K, setpoint=setpoint)\n", - "results = simulate_environment(\n", - " cart_env, max_steps=max_steps, controller=cart_controller\n", - ")\n", + "results = simulate_environment(cart_env, max_steps=200, controller=cart_controller)\n", "show_video(results.frames, fps=1 / cart_env.dt)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ "plt.close()\n", @@ -507,7 +529,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ ":::{solution} lqr-controller\n", ":::" @@ -516,7 +546,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" @@ -525,7 +563,14 @@ { "cell_type": "markdown", "metadata": { - "jp-MarkdownHeadingCollapsed": true + "editable": true, + "jp-MarkdownHeadingCollapsed": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] }, "source": [ "## Solution" @@ -581,9 +626,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - } + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -633,9 +682,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - } + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -647,25 +700,20 @@ "\n", "for k in range(50):\n", " u0 = inverted_pendulum_lqr_controller.make_step(x0)\n", - " x0 = inverted_pendulum_simulator.make_step(u0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "source_hidden": true - } - }, - "outputs": [], - "source": [ + " x0 = inverted_pendulum_simulator.make_step(u0)\n", + "\n", "animate_inverted_pendulum_simulation(inverted_pendulum_lqr_controller.data)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "**Evaluation**" ] @@ -692,9 +740,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - } + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -709,9 +761,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - } + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ diff --git a/notebooks/nb_50_MPC.ipynb b/notebooks/nb_50_MPC.ipynb index 2fa19fa..73cb279 100644 --- a/notebooks/nb_50_MPC.ipynb +++ b/notebooks/nb_50_MPC.ipynb @@ -15,11 +15,8 @@ "slide_type": "skip" }, "tags": [ - "remove-input", - "remove-output", - "remove-input-nbconv", - "remove-output-nbconv", - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,7 +43,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -58,13 +56,18 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true }, "scene__Initialization": true, + "slideshow": { + "slide_type": "" + }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -78,11 +81,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "editable": true, "init_cell": true, "jupyter": { - "outputs_hidden": true, "source_hidden": true }, "scene__Initialization": true, @@ -90,7 +91,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -214,6 +216,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -317,6 +320,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "fragment" } @@ -332,7 +336,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "### Controller" ] @@ -340,25 +349,29 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } }, "source": [ - "The control objective is to move the cart to a desired position (`9`)." + "The control objective is to move the cart to a desired position ($x_1 = 9$)." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "outputs": [], "source": [ - "setpoint = np.array([cart_env.goal_position, 10.0])\n", - "distance_cost = 100 * casadi.norm_2(cart_model.x.cat - setpoint)\n", + "distance_cost = casadi.norm_2(cart_model.x[\"position\"] - cart_env.goal_position)\n", "terminal_cost = distance_cost\n", "stage_cost = distance_cost\n", - "display_array(\"Setpoint\", setpoint)\n", "print(f\"Stage Cost = {stage_cost}\")\n", "print(f\"Terminal Cost = {terminal_cost}\")" ] @@ -366,6 +379,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -378,18 +392,24 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ - "force_penalty = 0.0" + "u_penalty = {\"force\": 1e-3}" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "We define as well upper and lower limits for the state and force" ] @@ -397,16 +417,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "outputs": [], "source": [ - "x_limits = np.array([-10, 10])\n", - "u_limits = np.array([-30, 30])" + "x_limits = {\"position\": np.array([-10, 10])}\n", + "u_limits = {\"force\": np.array([-30, 30])}" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "We then create an instance of `MPC` from the `do_mpc` package using the already defined `build_mpc_controller` function." ] @@ -414,16 +445,22 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "cart_mpc_controller = build_mpc_controller(\n", " model=cart_model,\n", " t_step=cart_env.dt,\n", - " n_horizon=20,\n", + " n_horizon=10,\n", " stage_cost=stage_cost,\n", " terminal_cost=terminal_cost,\n", - " force_penalty=force_penalty,\n", + " u_penalty=u_penalty,\n", " x_limits=x_limits,\n", " u_limits=u_limits,\n", ")" @@ -432,6 +469,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -446,16 +484,21 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "%%capture\n", "cart_mpc_controller.reset_history()\n", "cart_simulator.reset_history()\n", - "x0 = np.zeros((2, 1))\n", + "\n", + "x0 = np.random.normal(loc=np.zeros((2, 1)))\n", "cart_simulator.x0 = x0\n", "cart_mpc_controller.x0 = x0\n", "cart_mpc_controller.set_initial_guess()\n", @@ -469,9 +512,13 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { - "slide_type": "subslide" - } + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -481,6 +528,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -495,6 +543,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } @@ -505,7 +554,7 @@ " def __init__(self, mpc: MPC) -> None:\n", " self.mpc = mpc\n", " self.mpc.reset_history()\n", - " self.mpc.x0 = np.zeros(2)\n", + " self.mpc.x0 = np.random.normal(loc=np.zeros((2, 1)))\n", " self.mpc.set_initial_guess()\n", "\n", " def act(self, observation: NDArray) -> NDArray:\n", @@ -516,25 +565,32 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "%%capture\n", - "max_steps = 200\n", "controller = MPCController(cart_mpc_controller)\n", - "results = simulate_environment(cart_env, max_steps=max_steps, controller=controller)" + "results = simulate_environment(cart_env, max_steps=200, controller=controller)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -544,10 +600,17 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] + }, "outputs": [], "source": [ - "plt.close()\n", "T = np.arange(len(results.observations)) * cart_env.dt\n", "plot_cart_results(\n", " T=T,\n", @@ -559,7 +622,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "## Inverted Pendulum" ] @@ -603,7 +671,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ ":::{solution} inverted-pendulum-linear-mpc\n", ":::" @@ -612,7 +688,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" @@ -651,7 +735,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "We first need the environment, linear model and simulator:" ] @@ -660,8 +750,12 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true + }, + "slideshow": { + "slide_type": "" } }, "outputs": [], @@ -677,7 +771,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "The goal is to keep the inverted pendulum upright. For that we define the following costs, setpoint and force penalty:" ] @@ -686,9 +785,14 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -706,7 +810,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "We define as well upper and lower limits for the state and force" ] @@ -715,13 +824,17 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true + }, + "slideshow": { + "slide_type": "" } }, "outputs": [], "source": [ - "x_limits = {\"position\": np.array([-10, 10])}\n", + "x_limits = {\"theta\": np.array([-10, 10])}\n", "u_limits = {\n", " \"force\": np.array(\n", " [-inverted_pendulum_env.force_max, inverted_pendulum_env.force_max]\n", @@ -731,7 +844,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "After that, we create the controller:" ] @@ -740,16 +858,21 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ "inverted_pendulum_mpc_controller = build_mpc_controller(\n", " model=inverted_pendulum_lin_model,\n", " t_step=inverted_pendulum_env.dt,\n", - " n_horizon=100,\n", + " n_horizon=30,\n", " stage_cost=stage_cost,\n", " terminal_cost=terminal_cost,\n", " u_penalty=u_penalty,\n", @@ -760,7 +883,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "**Simulation**" ] @@ -769,12 +897,21 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ + "%%capture\n", "inverted_pendulum_mpc_controller.reset_history()\n", "inverted_pendulum_lin_simulator.reset_history()\n", "x0 = np.zeros((2, 1))\n", @@ -790,9 +927,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -801,7 +945,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "**Evaluation**" ] @@ -810,8 +959,12 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true + }, + "slideshow": { + "slide_type": "" } }, "outputs": [], @@ -831,9 +984,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -848,9 +1008,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -861,13 +1028,19 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "plt.close()\n", "T = np.arange(len(results.observations)) * inverted_pendulum_env.dt\n", "plot_inverted_pendulum_results(\n", " T=T, observations=results.observations, actions=results.actions, reference=np.inf\n", @@ -876,7 +1049,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ ":::{solution-end}\n", ":::" @@ -923,7 +1101,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ ":::{solution} non-linear-inverted-pendulum-mpc\n", ":::" @@ -932,7 +1118,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" @@ -941,7 +1135,14 @@ { "cell_type": "markdown", "metadata": { - "jp-MarkdownHeadingCollapsed": true + "editable": true, + "jp-MarkdownHeadingCollapsed": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] }, "source": [ "#### Solution" @@ -958,7 +1159,12 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ "**Fast Rotation**" ] @@ -974,9 +1180,14 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -993,46 +1204,88 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "The goal is to keep the inverted pendulum upright. For that we define the following costs and force penalty:" + "The goal is to make the inverted pendulum rotate as fast as possible. For that we define the following costs:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ "rotation_cost = -1000 * inverted_pendulum_nonlin_model.x[\"dtheta\"]\n", "terminal_cost = rotation_cost\n", "stage_cost = rotation_cost\n", - "force_penalty = 0.0\n", "print(f\"{stage_cost=}\")\n", "print(f\"{terminal_cost=}\")" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "We define as well upper and lower limits for the force:" + "We define as well upper and lower limits for the force and position:" ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "jupyter": { + "source_hidden": true + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], "source": [ - "u_limits = {\"force\": np.array([-inverted_pendulum_env.force_max, inverted_pendulum_env.force_max])}" + "x_limits = {\n", + " \"position\": np.array(\n", + " [-inverted_pendulum_env.x_threshold, inverted_pendulum_env.x_threshold]\n", + " )\n", + "}\n", + "u_limits = {\n", + " \"force\": np.array(\n", + " [-inverted_pendulum_env.force_max, inverted_pendulum_env.force_max]\n", + " )\n", + "}" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "After that, we create the controller:" ] @@ -1041,26 +1294,37 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ "inverted_pendulum_mpc_controller = build_mpc_controller(\n", " model=inverted_pendulum_nonlin_model,\n", " t_step=inverted_pendulum_env.dt,\n", - " n_horizon=100,\n", + " n_horizon=50,\n", " stage_cost=stage_cost,\n", " terminal_cost=terminal_cost,\n", - " force_penalty=force_penalty,\n", + " x_limits=x_limits,\n", " u_limits=u_limits,\n", ")" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "**Simulation**" ] @@ -1069,20 +1333,29 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "%%capture\n", - "x0 = np.zeros((4, 1))\n", - "x0[2] = 0.01\n", "inverted_pendulum_mpc_controller.reset_history()\n", "inverted_pendulum_nonlin_simulator.reset_history()\n", + "\n", + "x0 = np.random.normal(loc=np.zeros((4, 1)))\n", "inverted_pendulum_nonlin_simulator.x0 = x0\n", + "inverted_pendulum_mpc_controller.x0 = x0\n", + "inverted_pendulum_mpc_controller.set_initial_guess()\n", "\n", - "for k in range(50):\n", + "for k in range(100):\n", " u0 = inverted_pendulum_mpc_controller.make_step(x0)\n", " x0 = inverted_pendulum_nonlin_simulator.make_step(u0)" ] @@ -1091,9 +1364,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -1102,7 +1382,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "**Evaluation**" ] @@ -1111,9 +1397,14 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -1121,7 +1412,7 @@ " def __init__(self, mpc: do_mpc.controller.MPC) -> None:\n", " self.mpc = mpc\n", " self.mpc.reset_history()\n", - " self.mpc.x0 = np.zeros(4)\n", + " self.mpc.x0 = np.random.normal(loc=np.zeros((4, 1)))\n", " self.mpc.set_initial_guess()\n", "\n", " def act(self, observation: NDArray) -> NDArray:\n", @@ -1132,9 +1423,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -1149,9 +1447,16 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -1162,13 +1467,19 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "plt.close()\n", "T = np.arange(len(results.observations)) * inverted_pendulum_env.dt\n", "plot_inverted_pendulum_results(\n", " T=T, observations=results.observations, actions=results.actions, reference=np.inf\n", @@ -1177,7 +1488,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ ":::{solution-end}\n", ":::" @@ -1186,9 +1503,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "## MPC Controller Design Challenges\n", @@ -1204,9 +1523,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "slide" - } + }, + "tags": [] }, "source": [ "# Robust MPC\n", @@ -1222,9 +1543,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "## Multi-Stage MPC\n", @@ -1235,9 +1558,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ ":::{figure} ./_static/images/40_multi_state_mpc.png\n", @@ -1251,9 +1576,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "- Each node in the tree denotes the possible state of the system at every prediction step.\n", @@ -1268,9 +1595,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "### Inverted Pendulum\n", @@ -1283,9 +1612,11 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "#### Model" @@ -1294,7 +1625,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "inverted_pendulum_env = create_inverted_pendulum_environment(\n", @@ -1309,65 +1646,115 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ - "m_p_values = inverted_pendulum_env.masspole * np.array([1.0, 1.30, 0.70])\n", + "m_p_values = inverted_pendulum_env.masspole * np.array([1.0, 1.20, 0.80])\n", + "display_array(\"m_p\", m_p_values)\n", "uncertainty_values = {\"m_p\": m_p_values}" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "setpoint = np.zeros((4, 1))\n", - "setpoint[0] = 1.0\n", "distance_cost = casadi.bilin(\n", - " np.diag([1, 0, 100, 0]), inverted_pendulum_nonlin_model.x.cat - setpoint\n", + " np.diag([1, 0, 100, 1]), inverted_pendulum_nonlin_model.x.cat - setpoint\n", ")\n", "terminal_cost = distance_cost\n", "stage_cost = distance_cost\n", - "force_penalty = 1e-2\n", + "u_penalty = {\"force\": 1e-3}\n", "display_array(\"Setpoint\", setpoint)\n", - "print(f\"{stage_cost=}\")\n", - "print(f\"{terminal_cost=}\")" + "print(f\"Stage cost: {stage_cost}\")\n", + "print(f\"Terminal cost: {terminal_cost}\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ - "u_limits = np.array([-inverted_pendulum_env.force_max, inverted_pendulum_env.force_max])" + "x_limits = {\n", + " \"position\": np.array(\n", + " [-inverted_pendulum_env.x_threshold, inverted_pendulum_env.x_threshold]\n", + " )\n", + "}\n", + "u_limits = {\n", + " \"force\": np.array(\n", + " [-inverted_pendulum_env.force_max, inverted_pendulum_env.force_max]\n", + " )\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We again create an instance of `MPC` from the `do_mpc` package using the already defined `build_mpc_controller` function. This time however we pass the `n_robust` keyword argument to control the number of scenarios." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "inverted_pendulum_mpc_controller = build_mpc_controller(\n", " model=inverted_pendulum_nonlin_model,\n", " t_step=inverted_pendulum_env.dt,\n", - " n_horizon=100,\n", + " n_horizon=50,\n", " stage_cost=stage_cost,\n", " terminal_cost=terminal_cost,\n", - " force_penalty=force_penalty,\n", + " x_limits=x_limits,\n", " u_limits=u_limits,\n", + " u_penalty=u_penalty,\n", " uncertainty_values=uncertainty_values,\n", + " n_robust=1,\n", ")" ] }, { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "#### Evaluation" @@ -1377,9 +1764,11 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -1387,7 +1776,7 @@ " def __init__(self, mpc: do_mpc.controller.MPC) -> None:\n", " self.mpc = mpc\n", " self.mpc.reset_history()\n", - " self.mpc.x0 = np.zeros(4)\n", + " self.mpc.x0 = np.random.normal(loc=np.zeros((4, 1)))\n", " self.mpc.set_initial_guess()\n", "\n", " def act(self, observation: NDArray) -> NDArray:\n", @@ -1398,17 +1787,20 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "%%capture\n", - "max_steps = 200\n", "controller = MPCController(inverted_pendulum_mpc_controller)\n", "results = simulate_environment(\n", - " inverted_pendulum_env, max_steps=max_steps, controller=controller\n", + " inverted_pendulum_env, max_steps=200, controller=controller\n", ")" ] }, @@ -1416,9 +1808,13 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -1429,9 +1825,13 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ diff --git a/notebooks/nb_60_MCTS.ipynb b/notebooks/nb_60_MCTS.ipynb index 50e2e71..0e1bb94 100644 --- a/notebooks/nb_60_MCTS.ipynb +++ b/notebooks/nb_60_MCTS.ipynb @@ -15,11 +15,8 @@ "slide_type": "skip" }, "tags": [ - "remove-input", - "remove-output", - "remove-input-nbconv", - "remove-output-nbconv", - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,7 +43,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -54,52 +52,6 @@ "%presentation_style" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "source_hidden": true - }, - "scene__Initialization": true, - "tags": [ - "ActiveScene" - ] - }, - "outputs": [], - "source": [ - "import warnings\n", - "\n", - "warnings.simplefilter(\"ignore\", UserWarning)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "source_hidden": true - }, - "scene__Initialization": true, - "tags": [ - "ActiveScene" - ] - }, - "outputs": [], - "source": [ - "%autoreload\n", - "import numpy as np\n", - "from training_ml_control.environments import (\n", - " create_grid_world_environment,\n", - " plot_grid_graph,\n", - " plot_grid_all_paths_graph,\n", - " simulate_environment,\n", - ")\n", - "from training_ml_control.nb_utils import (\n", - " show_video,\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": { @@ -244,131 +196,17 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "source": [ "```{figure} _static/images/50_monte_carlo_tree_search.svg\n", ":width: 80%\n", "```" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exercise" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - ":::{exercise-start} Grid World Again\n", - ":label: grid-world-again\n", - ":::" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env = create_grid_world_environment(render_mode=\"rgb_array\", max_steps=50)\n", - "result = simulate_environment(env)\n", - "show_video(result.frames, fps=3)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The task can be represented as the following undirected graph:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env.reset()\n", - "G = env.unwrapped.get_graph()\n", - "plot_grid_graph(G)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert the graph to a directed graph with all possible paths from start to target that do not contain cycles" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G = convert_graph_to_directed(G)\n", - "plot_grid_graph(G)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We wish for the car to travel from its starting cell in red to the target cell in green. If the cost represents time and each step has the same cost then we want to find the shortest path to the goal.\n", - "\n", - "- Arrows (edges) indicate the possible movements.\n", - "- Numbers on edges indicate the cost of moving along an edge.\n", - "\n", - "Use MCTS to solve this problem and then implement the plan in the environment.\n", - "\n", - ":::{tip} Hint 1\n", - ":class: dropdown\n", - "\n", - "Determine all possible paths first.\n", - "\n", - "You can use `plot_grid_all_paths_graph(G)` for that.\n", - ":::\n", - "\n", - ":::{tip} Hint 2\n", - ":class: dropdown\n", - "\n", - "Compute the optimal cost-to-go at each node.\n", - "\n", - "You can use `dict(G.nodes(data=True))` to get a dictionary that maps the nodes to their attributes\n", - "and you can use `G.start_node` and `G.target_node` to access the start and end (i.e. goal) nodes, respectively.\n", - ":::" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - ":::{exercise-end}\n", - ":::" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "````{solution} grid-world-again\n", - "````" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your solution here" - ] } ], "metadata": { diff --git a/notebooks/nb_70_machine_learning_control.ipynb b/notebooks/nb_70_machine_learning_control.ipynb index 449fe6b..40cbfd5 100644 --- a/notebooks/nb_70_machine_learning_control.ipynb +++ b/notebooks/nb_70_machine_learning_control.ipynb @@ -34,6 +34,7 @@ "execution_count": null, "id": "f7339d3d-eae4-4e05-a7fb-050df96782e7", "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true @@ -57,13 +58,18 @@ "execution_count": null, "id": "6e892149-098a-42c7-b554-bbb83918888d", "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true }, "scene__Default Scene": true, + "slideshow": { + "slide_type": "" + }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -217,7 +223,12 @@ { "cell_type": "markdown", "id": "72d55a08-2862-495b-91a9-ef008bfbf035", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "source": [ ":::{solution} model-evaluation\n", ":class: dropdown\n", @@ -306,7 +317,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-cell" + ] }, "outputs": [], "source": [ @@ -315,9 +329,7 @@ "cart_frames = {}\n", "\n", "controllers = {\n", - " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", - " np.asarray([cart_env.max_action / 2])\n", - " ),\n", + " \"Step\": ConstantController(np.asarray([cart_env.max_action / 2])),\n", " \"Sinusoid @ 10Hz\": SineController(\n", " cart_env, np.asarray([cart_env.max_action]), frequency=10\n", " ),\n", @@ -326,7 +338,6 @@ " ),\n", " \"Schroeder Sweep\": SchroederSweepController(\n", " cart_env,\n", - " np.asarray([cart_env.max_action]),\n", " n_time_steps=500,\n", " n_harmonics=5,\n", " frequency=2,\n", @@ -350,7 +361,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -373,7 +387,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -413,7 +430,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -449,7 +469,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -956,7 +979,8 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "source": [ "### Data" @@ -970,7 +994,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-cell" + ] }, "outputs": [], "source": [ @@ -980,24 +1007,14 @@ "cart_actions = {}\n", "\n", "controllers = {\n", - " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", - " np.asarray([cart_env.max_action / 2])\n", - " ),\n", - " \"Sinusoid @ 10Hz\": SineController(\n", - " cart_env, np.asarray([cart_env.max_action]), frequency=100\n", - " ),\n", - " \"Sinusoid @ 0.5Hz\": SineController(\n", - " cart_env, np.asarray([cart_env.max_action]), frequency=0.5\n", - " ),\n", + " \"Step\": ConstantController(np.asarray([cart_env.max_action / 2])),\n", " \"Schroeder Sweep\": SchroederSweepController(\n", " cart_env,\n", - " np.asarray([cart_env.max_action]),\n", " n_time_steps=500,\n", " n_harmonics=5,\n", " frequency=2,\n", " ),\n", " \"PRBS\": PRBSController(np.asarray([cart_env.max_action])),\n", - " \"Random\": RandomController(cart_env),\n", "}\n", "\n", "for controller_name, controller in controllers.items():\n", @@ -1006,6 +1023,19 @@ " cart_actions[controller_name] = result.actions" ] }, + { + "cell_type": "markdown", + "id": "f98c7797-960d-4fb3-b565-882fba50bf29", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, + "source": [ + "We use the data from the PRBS controller as training data and use the one from the Step controller as validation data." + ] + }, { "cell_type": "code", "execution_count": null, @@ -1018,15 +1048,16 @@ }, "outputs": [], "source": [ - "# We use the data from PRBS\n", - "X = cart_observations[\"PRBS\"][:-1].copy()\n", - "U = cart_actions[\"PRBS\"].copy()\n", - "t = np.arange(0, len(X)) * cart_env.dt\n", - "# Train with 80% of the data and test with 20%\n", - "test_size = 0.2\n", - "X_train, X_test, U_train, U_test, t_train, t_test = train_test_split(\n", - " X, U, t, test_size=test_size, shuffle=False\n", - ")" + "training_controller_name = \"PRBS\"\n", + "testing_controller_name = \"Step\"\n", + "\n", + "X_train = cart_observations[training_controller_name][:-1].copy()\n", + "U_train = cart_actions[training_controller_name].copy()\n", + "t_train = np.arange(0, len(X_train)) * cart_env.dt\n", + "\n", + "X_val = cart_observations[testing_controller_name][:-1].copy()\n", + "U_val = cart_actions[testing_controller_name].copy()\n", + "t_val = np.arange(0, len(X_val)) * cart_env.dt" ] }, { @@ -1036,7 +1067,8 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "source": [ "### SINDYc\n", @@ -1074,12 +1106,12 @@ "optimizer = ps.STLSQ(threshold=0.1, max_iter=100)\n", "feature_library = ps.IdentityLibrary()\n", "differentiation_method = ps.FiniteDifference(order=1)\n", - "model = ps.SINDy(\n", + "sindy_model = ps.SINDy(\n", " optimizer=optimizer,\n", " feature_library=feature_library,\n", " differentiation_method=differentiation_method,\n", ")\n", - "model.fit(X_train, u=U_train, t=t_train)" + "sindy_model.fit(X_train, u=U_train, t=t_train)" ] }, { @@ -1103,11 +1135,14 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "model.print()" + "sindy_model.print()" ] }, { @@ -1135,13 +1170,16 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ "print(\n", " \"Model score: %f\"\n", - " % model.score(X_test, u=U_test, t=cart_env.dt, metric=mean_squared_error)\n", + " % sindy_model.score(X_val, u=U_val, t=cart_env.dt, metric=mean_squared_error)\n", ")" ] }, @@ -1166,12 +1204,15 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-cell" + ] }, "outputs": [], "source": [ - "X_sindy = model.simulate(X_test[0], t_test, u=U_test)\n", - "X_sindy = np.vstack([X_test[0][np.newaxis, :], X_sindy])" + "X_sindy = sindy_model.simulate(X_val[0], t_val, u=U_val)\n", + "X_sindy = np.vstack([X_val[0][np.newaxis, :], X_sindy])" ] }, { @@ -1182,21 +1223,20 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axs = plt.subplots(1, X_test.shape[1], sharex=True)\n", - "for i in range(X_test.shape[1]):\n", - " axs[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axs[i].plot(t_test, X_sindy[:, i], \"r--\", label=\"Model\")\n", - " axs[i].legend()\n", - " axs[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", - "\n", - "fig, ax = plt.subplots()\n", - "ax.plot(X_test[:, 0], X_test[:, 1], \"k\", label=\"Measured\")\n", - "ax.plot(X_sindy[:, 0], X_sindy[:, 1], \"r--\", label=\"Model\")\n", - "ax.legend()\n", + "fig, axes = plt.subplots(1, X_val.shape[1], sharex=True)\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_sindy[:, i], \"r--\", label=\"Model\")\n", + " axes[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", + " axes[i].legend()\n", + "\n", "fig.tight_layout()\n", "plt.show();" ] @@ -1246,8 +1286,8 @@ "outputs": [], "source": [ "DMDc = pk.regression.DMDc(svd_output_rank=4, svd_rank=6)\n", - "model = pk.Koopman(regressor=DMDc)\n", - "model.fit(X_train, u=U_train, dt=cart_env.dt)" + "dmd_model = pk.Koopman(regressor=DMDc)\n", + "dmd_model.fit(X_train, u=U_train, dt=cart_env.dt)" ] }, { @@ -1271,14 +1311,17 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "display_array(\"A\", model.A)\n", - "display_array(\"B\", model.B)\n", - "display_array(\"C\", model.C)\n", - "display_array(\"W\", model.W)" + "display_array(\"A\", dmd_model.A)\n", + "display_array(\"B\", dmd_model.B)\n", + "display_array(\"C\", dmd_model.C)\n", + "display_array(\"W\", dmd_model.W)" ] }, { @@ -1319,8 +1362,8 @@ }, "outputs": [], "source": [ - "X_dmd = model.simulate(X_test[0], U_test, n_steps=X_test.shape[0] - 1)\n", - "X_dmd = np.vstack([X_test[0][np.newaxis, :], X_dmd])" + "X_dmd = dmd_model.simulate(X_val[0], U_val, n_steps=X_val.shape[0] - 1)\n", + "X_dmd = np.vstack([X_val[0][np.newaxis, :], X_dmd])" ] }, { @@ -1344,11 +1387,14 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "print(\"Model score: %f\" % mean_squared_error(X_test, X_dmd))" + "print(\"Model score: %f\" % mean_squared_error(X_val, X_dmd))" ] }, { @@ -1359,22 +1405,20 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axs = plt.subplots(1, X_test.shape[1], sharex=True)\n", - "for i in range(X_test.shape[1]):\n", - " axs[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axs[i].plot(t_test, X_dmd[:, i], \"r--\", label=\"Model\")\n", - " axs[i].legend()\n", - " axs[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", - "\n", - "fig, ax = plt.subplots()\n", - "ax.plot(X_test[:, 0], X_test[:, 1], \"k\", label=\"Measured\")\n", - "ax.plot(X_dmd[:, 0], X_dmd[:, 1], \"r--\", label=\"Model\")\n", - "ax.set(xlabel=r\"$x_1$\", ylabel=r\"$x_2$\")\n", - "ax.legend()\n", + "fig, axes = plt.subplots(1, X_val.shape[1], sharex=True)\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_dmd[:, i], \"r--\", label=\"Model\")\n", + " axes[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", + " axes[i].legend()\n", + "\n", "fig.tight_layout()\n", "plt.show();" ] @@ -1386,7 +1430,8 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "source": [ "## Inverted Pendulum" @@ -1399,7 +1444,8 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "source": [ "### Exercise" @@ -1429,7 +1475,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-cell" + ] }, "source": [ ":::{solution} inverted-pendulum-data-model\n", @@ -1444,7 +1493,10 @@ "editable": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-cell" + ] }, "outputs": [], "source": [ @@ -1494,7 +1546,10 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-cell" + ] }, "outputs": [], "source": [ @@ -1528,7 +1583,10 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -1550,7 +1608,7 @@ { "cell_type": "code", "execution_count": null, - "id": "83211e32-c935-4c9f-b217-14cbed5fbcac", + "id": "f19a8805-8ec6-4b6f-bad8-e098084c2efc", "metadata": { "editable": true, "jupyter": { @@ -1562,16 +1620,16 @@ }, "outputs": [], "source": [ - "controller_name = \"PRBS 0\"\n", + "training_controller_name = \"PRBS 0\"\n", + "testing_controller_name = \"PRBS 1\"\n", "\n", - "X = inverted_pendulum_observations[controller_name][:-1].copy()\n", - "U = inverted_pendulum_actions[controller_name].copy()\n", - "t = np.arange(len(X)) * cart_env.dt\n", - "# Train with 80% of the data and test with 20%\n", - "test_size = 0.2\n", - "X_train, X_test, U_train, U_test, t_train, t_test = train_test_split(\n", - " X, U, t, test_size=test_size, shuffle=False\n", - ")" + "X_train = inverted_pendulum_observations[training_controller_name][:-1].copy()\n", + "U_train = inverted_pendulum_actions[training_controller_name].copy()\n", + "t_train = np.arange(0, len(X_train)) * inverted_pendulum_env.dt\n", + "\n", + "X_val = inverted_pendulum_observations[testing_controller_name][:-1].copy()\n", + "U_val = inverted_pendulum_actions[testing_controller_name].copy()\n", + "t_val = np.arange(0, len(X_val)) * inverted_pendulum_env.dt" ] }, { @@ -1598,21 +1656,24 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ - "optimizer = ps.STLSQ(threshold=0.3, max_iter=200)\n", + "optimizer = ps.STLSQ(threshold=0.3, max_iter=500)\n", "\n", "feature_library = ps.IdentityLibrary()\n", "\n", "differentiation_method = ps.FiniteDifference(order=1)\n", - "model = ps.SINDy(\n", + "sindy_model = ps.SINDy(\n", " optimizer=optimizer,\n", " feature_library=feature_library,\n", " differentiation_method=differentiation_method,\n", ")\n", - "model.fit(X_train, u=U_train, t=inverted_pendulum_env.dt)" + "sindy_model.fit(X_train, u=U_train, t=inverted_pendulum_env.dt)" ] }, { @@ -1626,11 +1687,14 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "model.print()" + "sindy_model.print()" ] }, { @@ -1644,16 +1708,19 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ "print(\n", " \"Model score: %f\"\n", - " % model.score(\n", - " X_test_list[test_index],\n", - " u=U_test_list[test_index],\n", - " t=cart_env.dt,\n", + " % sindy_model.score(\n", + " X_val,\n", + " u=U_val,\n", + " t=inverted_pendulum_env.dt,\n", " metric=mean_squared_error,\n", " )\n", ")" @@ -1670,12 +1737,13 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "outputs": [], "source": [ - "X_sindy = model.simulate(X_test[0], t_test, u=U_test)\n", - "X_sindy = np.vstack([X_test[0][np.newaxis, :], X_sindy])" + "X_sindy = sindy_model.simulate(X_val[0], t_val, u=U_val)\n", + "X_sindy = np.vstack([X_val[0][np.newaxis, :], X_sindy])" ] }, { @@ -1689,25 +1757,21 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axes = plt.subplots(2, X_test_list[test_index].shape[1] // 2, sharex=True)\n", + "fig, axes = plt.subplots(2, X_val.shape[1] // 2, sharex=True)\n", "axes = axes.ravel()\n", - "for i in range(X_test.shape[1]):\n", - " axes[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axes[i].plot(t_test, X_sindy[:, i], \"r--\", label=\"Model\")\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_sindy[:, i], \"r--\", label=\"Model\")\n", " axes[i].set(xlabel=\"t\", title=\"$x_{}$\".format(i + 1))\n", " axes[i].legend()\n", "fig.tight_layout()\n", - "\n", - "fig, ax = plt.subplots()\n", - "ax.plot(X_test[:, 0], X_test[:, 2], \"k\", label=\"Measured\")\n", - "ax.plot(X_sindy[:, 0], X_sindy[:, 2], \"r--\", label=\"Model\")\n", - "ax.set(xlabel=\"$x_1$\", ylabel=\"$x_3$\")\n", - "ax.legend()\n", - "fig.tight_layout()\n", "plt.show();" ] }, @@ -1727,7 +1791,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c96f242-72de-4d71-a502-ebaeebd53bf3", + "id": "cee60131-f5e7-48a2-a21a-1eefffd198cc", "metadata": { "editable": true, "jupyter": { @@ -1735,29 +1799,17 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "regressor = pk.regression.EDMDc()\n", - "\n", - "# Compute centers from training data\n", - "rng = np.random.default_rng(seed=16)\n", - "centers = rng.random((X_train.shape[1], X_train.shape[0]))\n", - "# Scale to input features' range\n", - "x_max = np.max(X_train, axis=0)\n", - "x_min = np.min(X_train, axis=0)\n", - "centers = centers * (x_max[:, np.newaxis] - x_min[:, np.newaxis]) + x_max[:, np.newaxis]\n", - "\n", - "obsv = pk.observables.RadialBasisFunction(\n", - " rbf_type=\"polyharmonic\",\n", - " centers=centers,\n", - " n_centers=centers.shape[0],\n", - " polyharmonic_coeff=1.1,\n", - " include_state=True,\n", - ")\n", - "model = pk.Koopman(observables=obsv, regressor=regressor)\n", - "model.fit(X_train, u=U_train, dt=cart_env.dt)" + "observables = pk.observables.Polynomial(degree=1)\n", + "dmd_model = pk.Koopman(observables=observables, regressor=regressor)\n", + "dmd_model.fit(X_train, u=U_train, dt=inverted_pendulum_env.dt)" ] }, { @@ -1769,16 +1821,20 @@ "jupyter": { "source_hidden": true }, + "scrolled": true, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "display_array(\"A\", model.A)\n", - "display_array(\"B\", model.B)\n", - "display_array(\"C\", model.C)\n", - "display_array(\"W\", model.W)" + "display_array(\"A\", dmd_model.A)\n", + "display_array(\"B\", dmd_model.B)\n", + "display_array(\"C\", dmd_model.C)\n", + "display_array(\"W\", dmd_model.W)" ] }, { @@ -1792,12 +1848,13 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "outputs": [], "source": [ - "X_dmd = model.simulate(X_test[0], U_test, n_steps=X_test.shape[0] - 1)\n", - "X_dmd = np.vstack([X_test[0][np.newaxis, :], X_dmd])" + "X_dmd = dmd_model.simulate(X_val[0], U_val, n_steps=X_val.shape[0] - 1)\n", + "X_dmd = np.vstack([X_val[0][np.newaxis, :], X_dmd])" ] }, { @@ -1811,11 +1868,14 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "print(\"Model score: %f\" % mean_squared_error(X_test, X_dmd))" + "print(\"Model score: %f\" % mean_squared_error(X_val, X_dmd))" ] }, { @@ -1829,15 +1889,18 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axes = plt.subplots(2, X_test.shape[1] // 2, sharex=True)\n", + "fig, axes = plt.subplots(2, X_val.shape[1] // 2, sharex=True)\n", "axes = axes.ravel()\n", - "for i in range(X_test.shape[1]):\n", - " axes[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axes[i].plot(t_test, X_dmd[:, i], \"r--\", label=\"Model\")\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_dmd[:, i], \"r--\", label=\"Model\")\n", " axes[i].set(xlabel=\"t\", title=\"$x_{}$\".format(i + 1))\n", " axes[i].legend()\n", "fig.tight_layout()\n", diff --git a/notebooks/nb_80_safe_learning_control.ipynb b/notebooks/nb_80_safe_learning_control.ipynb index f2f6735..d66de65 100644 --- a/notebooks/nb_80_safe_learning_control.ipynb +++ b/notebooks/nb_80_safe_learning_control.ipynb @@ -4,6 +4,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true @@ -13,7 +14,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -26,6 +28,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true @@ -35,7 +38,8 @@ "slide_type": "skip" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -46,9 +50,13 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "slide" - } + }, + "tags": [ + "remove-cell" + ] }, "source": [ "```{figure} ./_static/images/aai-institute-cover.png\n", @@ -391,6 +399,7 @@ { "cell_type": "markdown", "metadata": { + "editable": true, "slideshow": { "slide_type": "subslide" } diff --git a/notebooks/nb_90_practice.ipynb b/notebooks/nb_90_practice.ipynb index a8d783d..c9a8c12 100644 --- a/notebooks/nb_90_practice.ipynb +++ b/notebooks/nb_90_practice.ipynb @@ -34,6 +34,7 @@ "execution_count": null, "id": "f7339d3d-eae4-4e05-a7fb-050df96782e7", "metadata": { + "editable": true, "init_cell": true, "jupyter": { "source_hidden": true @@ -67,7 +68,8 @@ "slide_type": "" }, "tags": [ - "ActiveScene" + "ActiveScene", + "remove-cell" ] }, "outputs": [], @@ -107,7 +109,9 @@ "import seaborn as sns\n", "import pykoopman as pk\n", "import pysindy as ps\n", + "from do_mpc.controller import MPC\n", "from do_mpc.simulator import Simulator\n", + "from numpy.typing import NDArray\n", "from sklearn.metrics import mean_squared_error\n", "from scipy.signal import periodogram\n", "from scipy.fft import rfft, rfftfreq\n", @@ -132,8 +136,7 @@ "from training_ml_control.models import build_sindy_model\n", "\n", "sns.set_theme()\n", - "plt.rcParams[\"figure.figsize\"] = [12, 8]\n", - "warnings.simplefilter(\"ignore\", ExperimentalWarning)" + "plt.rcParams[\"figure.figsize\"] = [12, 8]" ] }, { @@ -264,7 +267,15 @@ { "cell_type": "markdown", "id": "febdaf0c-b8dc-412c-af17-0f685cd2d81e", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ ":::{solution} pendulum-model-exercise\n", ":::" @@ -274,7 +285,15 @@ "cell_type": "code", "execution_count": null, "id": "c72b43f0-abea-4a7e-ace1-b09c9a848278", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" @@ -328,7 +347,10 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -361,9 +383,16 @@ "execution_count": null, "id": "f110a03b-c0a1-46bc-a34f-288d07e91dc3", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -385,9 +414,16 @@ "execution_count": null, "id": "d1b7513e-5fca-4ff6-8415-717eca4ee8d6", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -410,9 +446,14 @@ "execution_count": null, "id": "1449f263-edaf-4ad2-bc56-0e09af7e50ba", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -423,9 +464,9 @@ "U_train = actions[training_controller_name].copy()\n", "t_train = np.arange(0, len(X_train)) * env.dt\n", "\n", - "X_test = observations[testing_controller_name][:-1].copy()\n", - "U_test = actions[testing_controller_name].copy()\n", - "t_test = np.arange(0, len(X_test)) * env.dt" + "X_val = observations[testing_controller_name][:-1].copy()\n", + "U_val = actions[testing_controller_name].copy()\n", + "t_val = np.arange(0, len(X_val)) * env.dt" ] }, { @@ -441,9 +482,16 @@ "execution_count": null, "id": "6241990a-4daf-40c5-a7ca-00e7e6ec5dbc", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -458,9 +506,16 @@ "execution_count": null, "id": "9abd56c2-8d09-4477-8675-0830aa570553", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -478,13 +533,16 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ "print(\n", " \"Model score: %f\"\n", - " % sindy_model.score(X_test, u=U_test, t=env.dt, metric=mean_squared_error)\n", + " % sindy_model.score(X_val, u=U_val, t=env.dt, metric=mean_squared_error)\n", ")" ] }, @@ -500,12 +558,13 @@ "scrolled": true, "slideshow": { "slide_type": "" - } + }, + "tags": [] }, "outputs": [], "source": [ - "X_sindy = sindy_model.simulate(X_test[0], t_test, u=U_test)\n", - "X_sindy = np.vstack([X_test[0][np.newaxis, :], X_sindy])" + "X_sindy = sindy_model.simulate(X_val[0], t_val, u=U_val)\n", + "X_sindy = np.vstack([X_val[0][np.newaxis, :], X_sindy])" ] }, { @@ -513,19 +572,26 @@ "execution_count": null, "id": "55a5a11d-7737-4084-af3a-c44c912133af", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axes = plt.subplots(2, X_test.shape[1] // 2 + X_test.shape[1] % 2, sharex=True)\n", + "fig, axes = plt.subplots(2, X_val.shape[1] // 2 + X_val.shape[1] % 2, sharex=True)\n", "axes = axes.ravel()\n", - "for i in range(X_test.shape[1]):\n", - " axes[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axes[i].plot(t_test, X_sindy[:, i], \"r--\", label=\"Model\")\n", - " axes[i].legend()\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_sindy[:, i], \"r--\", label=\"Model\")\n", " axes[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", + " axes[i].legend()\n", "\n", "fig.tight_layout()\n", "fig.show();" @@ -560,7 +626,10 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -575,14 +644,19 @@ "execution_count": null, "id": "6d8438ce-2e41-40ee-830e-f55bbd2b3a58", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ - "X_dmd = dmd_model.simulate(X_test[0], U_test, n_steps=X_test.shape[0] - 1)\n", - "X_dmd = np.vstack([X_test[0][np.newaxis, :], X_dmd])" + "X_dmd = dmd_model.simulate(X_val[0], U_val, n_steps=X_val.shape[0] - 1)\n", + "X_dmd = np.vstack([X_val[0][np.newaxis, :], X_dmd])" ] }, { @@ -590,13 +664,20 @@ "execution_count": null, "id": "f0f9f8b5-e79d-4537-9667-297f3ab7e3a9", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "print(\"Model score: %f\" % mean_squared_error(X_test, X_dmd))" + "print(\"Model score: %f\" % mean_squared_error(X_val, X_dmd))" ] }, { @@ -610,15 +691,18 @@ }, "slideshow": { "slide_type": "" - } + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ - "fig, axes = plt.subplots(2, X_test.shape[1] // 2 + X_test.shape[1] % 2, sharex=True)\n", + "fig, axes = plt.subplots(2, X_val.shape[1] // 2 + X_val.shape[1] % 2, sharex=True)\n", "axes = axes.ravel()\n", - "for i in range(X_test.shape[1]):\n", - " axes[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", - " axes[i].plot(t_test, X_dmd[:, i], \"r--\", label=\"Model\")\n", + "for i in range(X_val.shape[1]):\n", + " axes[i].plot(t_val, X_val[:, i], \"k\", label=\"Measured\")\n", + " axes[i].plot(t_val, X_dmd[:, i], \"r--\", label=\"Model\")\n", " axes[i].legend()\n", " axes[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", "\n", @@ -629,7 +713,13 @@ { "cell_type": "markdown", "id": "249e7fb9-7d2e-4dee-bbf9-3fd329ec9a9f", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "The results don't seem good enough. We could also use hyper-parameter optimization to find the best model. However, we have to be careful with overfitting." ] @@ -675,7 +765,15 @@ { "cell_type": "markdown", "id": "b819b2c4-0633-4a85-88dc-31336ac876f1", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "source": [ ":::{solution} pendulum-control-exercise\n", ":::" @@ -685,7 +783,15 @@ "cell_type": "code", "execution_count": null, "id": "94bc22d1-5018-4dff-b1b1-935d546f23d5", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# Your solution here" @@ -731,7 +837,13 @@ { "cell_type": "markdown", "id": "07c8e2d4-f0d6-46d1-9a60-93bf3d8efc8f", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "**Model**" ] @@ -741,9 +853,14 @@ "execution_count": null, "id": "6b2243a9-1391-48e3-ba9d-855a1997795c", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -763,9 +880,14 @@ "execution_count": null, "id": "b7f3ddc3-0735-4aab-b0bb-ebea13bb9ca4", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -785,19 +907,26 @@ "execution_count": null, "id": "6fbc369c-4d04-4a99-ab52-f48b840d30c4", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ "%%capture\n", - "x0 = X_test[0]\n", + "x0 = X_val[0]\n", "\n", "simulator.reset_history()\n", "simulator.x0 = x0\n", "\n", - "for u in U_test:\n", + "for u in U_val:\n", " simulator.make_step(u.reshape((-1, 1)))" ] }, @@ -806,9 +935,16 @@ "execution_count": null, "id": "dba337e7-fe82-4d66-9c5a-58274073e956", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -828,9 +964,14 @@ "execution_count": null, "id": "b256a42a-cbfc-4a91-93f3-323caaf992c1", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -848,9 +989,14 @@ "execution_count": null, "id": "f3d05114-2831-4fcb-acaa-2ba3f2be3d59", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -864,9 +1010,14 @@ "execution_count": null, "id": "a881458d-1bc9-4cf8-9143-ee37a4f13fee", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -895,9 +1046,16 @@ "execution_count": null, "id": "6c4d867c-1eb0-4859-9694-314103585c0d", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input" + ] }, "outputs": [], "source": [ @@ -929,9 +1087,16 @@ "execution_count": null, "id": "e46bd3ed-8a22-48f2-beab-b63e8c69b3fe", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -951,9 +1116,14 @@ "execution_count": null, "id": "a3d8f3d4-0a0d-425e-9d08-326e15de4c69", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -981,9 +1151,14 @@ "execution_count": null, "id": "adaab80b-75dd-48b4-8c02-e24316b9e1c4", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -997,9 +1172,16 @@ "execution_count": null, "id": "fc0a22f0-c146-4b85-bfe4-665c3147ea9c", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ @@ -1011,9 +1193,16 @@ "execution_count": null, "id": "391ff9a2-6b31-4597-b81a-26202ed62cde", "metadata": { + "editable": true, "jupyter": { "source_hidden": true - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-input" + ] }, "outputs": [], "source": [ diff --git a/src/training_ml_control/control.py b/src/training_ml_control/control.py index 2852598..7df7ae9 100644 --- a/src/training_ml_control/control.py +++ b/src/training_ml_control/control.py @@ -151,11 +151,13 @@ def build_mpc_controller( u_penalty: dict[str, float] | None = None, *, uncertainty_values: dict[str, NDArray] | None = None, + n_robust: int = 1, ) -> MPC: mpc = MPC(model) mpc_params = { "n_horizon": n_horizon, "t_step": t_step, + "n_robust": n_robust, "state_discretization": "collocation", "collocation_type": "radau", "collocation_deg": 3, diff --git a/src/training_ml_control/models/models.py b/src/training_ml_control/models/models.py index 8fac790..0f522b2 100644 --- a/src/training_ml_control/models/models.py +++ b/src/training_ml_control/models/models.py @@ -27,12 +27,9 @@ def build_cart_model(env: CartEnv) -> LinearModel: D = np.zeros(1) model = LinearModel("continuous") - pos = model.set_variable(var_type="_x", var_name="position") - dpos = model.set_variable(var_type="_x", var_name="velocity") + model.set_variable(var_type="_x", var_name="position") + model.set_variable(var_type="_x", var_name="velocity") model.set_variable(var_type="_u", var_name="force") - # Energy - E_kin = 0.5 * dpos**2 - model.set_expression("E_kinetic", E_kin) model.setup(A, B, C, D) model = model.discretize(env.dt) diff --git a/src/training_ml_control/plots.py b/src/training_ml_control/plots.py index 4115f86..d9d9d7c 100644 --- a/src/training_ml_control/plots.py +++ b/src/training_ml_control/plots.py @@ -26,6 +26,7 @@ def plot_cart_results( """As its name suggests, this function plots the results of a run of the cart environment. """ + plt.close() fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharex=True) ax1.plot(T, observations[:, 0]) ax1.hlines(reference, T[0], T[-1], "r") @@ -50,6 +51,7 @@ def plot_inverted_pendulum_results( """As its name suggests, this function plots the results of a run of the inverted pendulum environment. """ + plt.close() fig, axes = plt.subplots(3, 2, sharex=True) axes = axes.ravel()