All the notebooks are working now.

DeepReinforcementLearning · Apr 21, 2024 · b035945 · b035945
1 parent 26d7e0b
commit b035945
Show file tree

Hide file tree

Showing 13 changed files with 2,010 additions and 644 deletions.
diff --git a/Chapter 1/Ch1_Introduction.ipynb b/Chapter 1/Ch1_Introduction.ipynb
@@ -3,7 +3,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "# Deep Reinforcement Learning <em> in Action </em>\n",
@@ -87,14 +90,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5.54 s ± 432 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "4.4 s ± 844 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     }
    ],
@@ -105,29 +108,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The slowest run took 4.44 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
-      "412 ns ± 319 ns per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "319 ns ± 274 ns per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     }
    ],
    "source": [
     "%timeit fib_mem(35)\n",
     "# We get 412 ns to run with n=35"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:deeprl]",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "conda-env-deeprl-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -139,9 +148,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/Chapter 10/Ch10_book.ipynb b/Chapter 10/Ch10_book.ipynb
diff --git a/Chapter 2/Ch2_book.ipynb b/Chapter 2/Ch2_book.ipynb
diff --git a/Chapter 3/Ch3_book.ipynb b/Chapter 3/Ch3_book.ipynb
diff --git a/Chapter 4/Ch4_book.ipynb b/Chapter 4/Ch4_book.ipynb
diff --git a/Chapter 5/Ch5_book.ipynb b/Chapter 5/Ch5_book.ipynb
@@ -17,9 +17,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
+      " 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47\n",
+      " 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63]\n",
+      "8\n",
+      "[array([ 0,  1,  4,  9, 16, 25, 36, 49]), array([ 64,  81, 100, 121, 144, 169, 196, 225]), array([256, 289, 324, 361, 400, 441, 484, 529]), array([576, 625, 676, 729, 784, 841, 900, 961]), array([1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521]), array([1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209]), array([2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025]), array([3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969])]\n"
+     ]
+    }
+   ],
    "source": [
     "import multiprocessing as mp\n",
     "import numpy as np\n",
@@ -42,9 +54,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "In process 0\n",
+      "In process 1\n",
+      "In process 2\n",
+      "In process 3\n",
+      "In process 4\n",
+      "In process 5\n",
+      "In process 6\n",
+      "In process 7\n",
+      "[array([ 0,  1,  4,  9, 16, 25, 36, 49]), array([ 64,  81, 100, 121, 144, 169, 196, 225]), array([256, 289, 324, 361, 400, 441, 484, 529]), array([576, 625, 676, 729, 784, 841, 900, 961]), array([1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521]), array([1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209]), array([2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025]), array([3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969])]\n"
+     ]
+    }
+   ],
    "source": [
     "def square(i, x, queue):\n",
     "    print(\"In process {}\".format(i,))\n",
@@ -79,9 +107,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/envs/registration.py:307: DeprecationWarning: The package name gym_minigrid has been deprecated in favor of minigrid. Please uninstall gym_minigrid and install minigrid with `pip install minigrid`. Future releases will be maintained under the new package name minigrid.\n",
+      "  fn()\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "from torch import nn\n",
@@ -118,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -143,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -161,7 +198,7 @@
     "        action = action_dist.sample() #E\n",
     "        logprob_ = policy.view(-1)[action]\n",
     "        logprobs.append(logprob_)\n",
-    "        state_, _, done, info = worker_env.step(action.detach().numpy())\n",
+    "        state_, _, done, _, info = worker_env.step(action.detach().numpy())\n",
     "        state = torch.from_numpy(state_).float()\n",
     "        if done: #F\n",
     "            reward = -10\n",
@@ -181,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -215,7 +252,28 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n",
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
+      "  if not isinstance(terminated, (bool, np.bool8)):\n"
+     ]
+    }
+   ],
    "source": [
     "MasterNode = ActorCritic() #A\n",
     "MasterNode.share_memory() #B\n",
@@ -246,9 +304,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/envs/classic_control/cartpole.py:211: UserWarning: \u001b[33mWARN: You are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym(\"CartPole-v1\", render_mode=\"rgb_array\")\u001b[0m\n",
+      "  gym.logger.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Lost\n",
+      "Lost\n",
+      "Lost\n",
+      "Lost\n"
+     ]
+    }
+   ],
    "source": [
     "env = gym.make(\"CartPole-v1\")\n",
     "env.reset()\n",
@@ -259,7 +336,7 @@
     "    logits,value = MasterNode(state)\n",
     "    action_dist = torch.distributions.Categorical(logits=logits)\n",
     "    action = action_dist.sample()\n",
-    "    state2, reward, done, info = env.step(action.detach().numpy())\n",
+    "    state2, reward, done, info, _ = env.step(action.detach().numpy())\n",
     "    if done:\n",
     "        print(\"Lost\")\n",
     "        env.reset()\n",
@@ -277,7 +354,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -318,9 +395,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No bootstrapping\n",
+      "0.010000000000000009 1.99\n",
+      "With bootstrapping\n",
+      "0.9901 2.9701\n"
+     ]
+    }
+   ],
    "source": [
     "#Simulated rewards for 3 steps\n",
     "r1 = [1,1,-1]\n",
@@ -342,13 +430,20 @@
     "print(\"With bootstrapping\")\n",
     "print(R1,R2)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:deeprl]",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "conda-env-deeprl-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -360,9 +455,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/Chapter 6/Ch6_book.ipynb b/Chapter 6/Ch6_book.ipynb