diff --git a/tutorials/3-basic-pz.ipynb b/tutorials/3-basic-pz.ipynb new file mode 100644 index 0000000..aa2b576 --- /dev/null +++ b/tutorials/3-basic-pz.ipynb @@ -0,0 +1,383 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Tutorial 3: Multiagent scenarios with PettingZoo " + ], + "metadata": { + "collapsed": false + }, + "id": "2e94ff6886341f4b" + }, + { + "cell_type": "markdown", + "source": [ + "In this tutorial, we will use Cogment Lab to interact with a multiagent environment from the [PettingZoo](https://www.pettingzoo.farama.org/) library. We will combine simple algorithmic agents with a human player to play a game of cooperative Pong.\n", + "\n", + "Most of the code is the same as with Gymnasium -- check out the previous tutorial to understand it." + ], + "metadata": { + "collapsed": false + }, + "id": "9cd2edc71083bc7" + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from cogment_lab import Cogment\n", + "from cogment_lab.envs import ParallelEnvironment \n", + "from cogment_lab.actors import RandomActor, ConstantActor" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:56.492359Z", + "start_time": "2024-01-18T22:24:56.097685Z" + } + }, + "id": "de5fa0a6017c756d", + "execution_count": 1 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "cog = Cogment(log_dir=\"logs/tutorial3\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:56.495634Z", + "start_time": "2024-01-18T22:24:56.493720Z" + } + }, + "id": "99af24a388e8efa9", + "execution_count": 2 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cenv = ParallelEnvironment(\n", + " env_path=\"pettingzoo.butterfly.cooperative_pong_v5.parallel_env\",\n", + " render=True\n", + ")\n", + "\n", + "await cog.run_env(env=cenv, \n", + " env_name=\"pong\",\n", + " port=9011, \n", + " log_file=\"env.log\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:57.375763Z", + "start_time": "2024-01-18T22:24:56.495844Z" + } + }, + "id": "141bf2be7787d24a", + "execution_count": 3 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "act_space = cenv.env.action_space(\"paddle_0\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:57.378585Z", + "start_time": "2024-01-18T22:24:57.376109Z" + } + }, + "id": "4862e2e0d4899a40", + "execution_count": 4 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actor = RandomActor(action_space=act_space)\n", + "\n", + "await cog.run_actor(actor, \n", + " actor_name=\"random\",\n", + " port=9021,\n", + " log_file=\"actor-random.log\"\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:57.899263Z", + "start_time": "2024-01-18T22:24:57.379466Z" + } + }, + "id": "194acaf28f039ecd", + "execution_count": 5 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "constant_actor = ConstantActor(action=0)\n", + "\n", + "await cog.run_actor(constant_actor, \n", + " actor_name=\"constant\",\n", + " port=9022,\n", + " log_file=\"actor-constant.log\"\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:58.321857Z", + "start_time": "2024-01-18T22:24:57.899701Z" + } + }, + "id": "25a3f9601d66d5c5", + "execution_count": 6 + }, + { + "cell_type": "markdown", + "source": [ + "Just like with Gymnasium, we can get a trial from a PettingZoo environment by specifying the environment and the actors -- we just need to specify an actor name for each agent in the environment." + ], + "metadata": { + "collapsed": false + }, + "id": "e71c8cd6b15e394d" + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "['paddle_0', 'paddle_1']" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cenv.env.possible_agents" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:58.325979Z", + "start_time": "2024-01-18T22:24:58.322306Z" + } + }, + "id": "5f750d8c578f228a", + "execution_count": 7 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "trial_id = await cog.start_trial(\n", + " env_name=\"pong\", # Name of the environment to use\n", + " actor_impls={\n", + " \"paddle_0\": \"random\",\n", + " \"paddle_1\": \"constant\"\n", + " },\n", + ")\n", + "data = await cog.get_trial_data(trial_id)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:59.202037Z", + "start_time": "2024-01-18T22:24:58.530299Z" + } + }, + "id": "3341931185f7585b", + "execution_count": 8 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hopefully random actions: [1 1 0 1 0 1 1 2 1 0 2 0 1 1 2 1 0 0 2 1 1 2 1 1 2 1 0 2 1 2 1 1 1 0 1 0 2\n", + " 0 0 0 0 1 0 2 2 0 0 2]\n", + "Hopefully constant actions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0]\n" + ] + } + ], + "source": [ + "print(f\"Hopefully random actions: {data['paddle_0'].actions}\")\n", + "print(f\"Hopefully constant actions: {data['paddle_1'].actions}\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:59.204715Z", + "start_time": "2024-01-18T22:24:59.202481Z" + } + }, + "id": "3bbe320151c185d2", + "execution_count": 9 + }, + { + "cell_type": "markdown", + "source": [ + "Just like with Gymnasium, we can use a human in the trial. Unlike Gymnasium, we can have both a human and an \"AI\" acting together in the same environment: " + ], + "metadata": { + "collapsed": false + }, + "id": "59eecb3aff9cf2d8" + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await cog.run_web_ui(\n", + " actions=[\"no-op\", \"ArrowUp\", \"ArrowDown\"],\n", + " log_file=\"human.log\",\n", + " fps=30\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:24:59.728665Z", + "start_time": "2024-01-18T22:24:59.205379Z" + } + }, + "id": "42660085df9c9271", + "execution_count": 10 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "trial_id = await cog.start_trial(\n", + " env_name=\"pong\", # Name of the environment to use\n", + " session_config={\"render\": True},\n", + " actor_impls={\n", + " \"paddle_0\": \"random\",\n", + " \"paddle_1\": \"web_ui\"\n", + " },\n", + ")\n", + "data = await cog.get_trial_data(trial_id)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:25:07.565831Z", + "start_time": "2024-01-18T22:24:59.728081Z" + } + }, + "id": "557a7b078934468e", + "execution_count": 11 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random actions: [0 0 0 2 0 2 1 0 2 0 2 1 2 0 0 2 2 1 0 0 2 2 1 2 1 0 0 2 1 1 2 1 1 0 0 1 2\n", + " 1 0 0 0 2 1 2 2 2 1 2]\n", + "Human actions: [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 2 2 2 2 2 0 0 0 0 0 1 1 1 1 0 0 2 2\n", + " 2 0 0 0 0 0 1 1 1 1 0]\n", + "Your joint reward: -4.777777671813965\n" + ] + } + ], + "source": [ + "print(f\"Random actions: {data['paddle_0'].actions}\")\n", + "print(f\"Human actions: {data['paddle_1'].actions}\")\n", + "print(f\"Your joint reward: {data['paddle_1'].rewards.sum()}\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-18T22:29:11.368500Z", + "start_time": "2024-01-18T22:29:11.364298Z" + } + }, + "id": "66a50c19b2e01a28", + "execution_count": 16 + }, + { + "cell_type": "markdown", + "source": [ + "Feel free to try other environments, play against other agents.\n", + "\n", + "Note: it is currently not possible to have two human \"agents\" playing at the same time -- it's only one human and an arbitrary number of \"AI\" agents. This is likely to change in the future." + ], + "metadata": { + "collapsed": false + }, + "id": "6d912f1225ac01fc" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}