Merge pull request #114 from MetOffice/update/aws-input

Update/aws input
MetOffice · Mar 16, 2022 · c3c66be · c3c66be
2 parents 2ca552e + 95f8155
commit c3c66be
Show file tree

Hide file tree

Showing 7 changed files with 132 additions and 58 deletions.
diff --git a/notebooks/utils.py b/notebooks/utils.py
@@ -81,6 +81,9 @@ def copy_s3_files(in_fileglob, out_folder):
     in_fileglob: s3 uri of flies (wild card can be used)
     out_folder: local path where data will be stored
     '''
+    if os.path.isdir(out_folder) == 0:
+        mode = 0o777
+        os.makedirs(out_folder, mode, exist_ok = False)
     matching_keys = find_matching_s3_keys(in_fileglob)
     in_bucket_name = _split_s3_uri(in_fileglob)[0]
     out_scheme = urlparse(out_folder).scheme
@@ -120,7 +123,6 @@ def load_data(inpath):
         for key in keys:
             file = key.split('/')[-1]
             if os.path.exists(os.path.join(temp_path, file)) == 0:
-                print(os.path.join(s3dir, file))
                 copy_s3_files(os.path.join(s3dir, file), temp_path)
             else:
                 print(key, ' already exist')

diff --git a/notebooks/worksheet1.ipynb b/notebooks/worksheet1.ipynb
@@ -317,9 +317,19 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# download data from S3 bucket in data directory\n",
+    "from utils import copy_s3_files, flush_data\n",
+    "\n",
+    "copy_s3_files('s3://ias-pyprecis/data/sample_data.nc', 'data/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# import the necessary modules\n",
@@ -331,7 +341,7 @@
     "%matplotlib inline \n",
     "\n",
     "# provide the path of your sample data\n",
-    "sample_data = '/project/ciid/projects/PRECIS/worksheets/data/sample_data.nc'\n",
+    "sample_data = 'data/sample_data.nc'\n",
     "\n",
     "# Constraint the reading to a single variable and load it into an Iris cube\n",
     "cube = iris.load_cube(sample_data)\n",
@@ -454,18 +464,29 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# download data from S3 buket to data directory\n",
+    "from utils import copy_s3_files\n",
+    "\n",
+    "copy_s3_files('s3://ias-pyprecis/data/pp/cahpa/*', 'data/pp/cahpa/')\n",
+    "copy_s3_files('s3://ias-pyprecis/data/pp/cahpb/*', 'data/pp/cahpb/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "datadir = '/project/ciid/projects/PRECIS/worksheets/data'\n",
+    "datadir = 'data/'\n",
     "\n",
     "rim_width = 8  # width of rim (in number of grid boxes)\n",
     "\n",
     "for runid in ['cahpa', 'cahpb']:\n",
     "    ppdir = os.path.join(datadir, 'pp', runid)\n",
-    "    \n",
+    "\n",
     "    # find all the files from which to remove the rim\n",
     "    file_list = glob.glob(ppdir + '/*pm[ghij]*.pp')\n",
     "    \n",
@@ -483,13 +504,28 @@
     "        # add meta data stating that rim has been removed\n",
     "        rrcube.attributes['rim_removed'] = '{} point rim removed'.format(rim_width)\n",
     "        trimmed_cubes.append(rrcube)\n",
+    "    \n",
     "    rrcubes = iris.cube.CubeList(trimmed_cubes)\n",
     "    # Write out the trimmed data file\n",
-    "    outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
+    "    #outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
+    "    outfile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
+    "\n",
     "    iris.save(rrcubes, outfile)\n",
     "    print('Saved {}'.format(outfile))"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Delete pp data from the disk\n",
+    "from utils import flush_data\n",
+    "flush_data('data/pp/cahpa/*')\n",
+    "flush_data('data/pp/cahpb/*')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -564,14 +600,14 @@
     "\n",
     "for runid in ['cahpa', 'cahpb']:\n",
     "    # Get data directory\n",
-    "    infile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
+    "    infile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
     "    # This will load all the variables in the file into a CubeList\n",
     "    datacubes = iris.load(infile)\n",
     "    for cube in datacubes:\n",
     "        # get the STASH code\n",
     "        cubeSTASH = cube.attributes['STASH']\n",
     "        # Make the output file name\n",
-    "        outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
+    "        outfile = os.path.join(datadir, runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
     "        # Save the file\n",
     "        iris.save(cube, outfile)\n",
     "        print('Saved {}'.format(outfile))            "
@@ -653,10 +689,11 @@
   }
  ],
  "metadata": {
+  "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "pyprecis-environment",
+   "display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
    "language": "python",
-   "name": "pyprecis-environment"
+   "name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
   },
   "language_info": {
    "codemirror_mode": {
@@ -676,5 +713,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/notebooks/worksheet2.ipynb b/notebooks/worksheet2.ipynb
@@ -75,7 +75,8 @@
     "import iris.quickplot as qplt\n",
     "import cartopy.crs as ccrs\n",
     "from mpl_toolkits.axes_grid1 import AxesGrid\n",
-    "from cartopy.mpl.geoaxes import GeoAxes"
+    "from cartopy.mpl.geoaxes import GeoAxes\n",
+    "from utils import copy_s3_files, flush_data"
    ]
   },
   {
@@ -104,21 +105,31 @@
     "Before running the code, take a look at it line-by-line to understand what steps are being taken. Then click in the box and press <kbd>ctrl</kbd> + <kbd>enter</kbd> to run the code."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# we first need to download APHRODITE data\n",
+    "copy_s3_files('s3://ias-pyprecis/data/APHRODITE/*.nc', 'data/APHRODITE/')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Provide the names of the directories where the netCDF model files are stored\n",
-    "DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
+    "DATADIR = 'data/'\n",
     "\n",
     "# Load and print the HadCM3Q0 (cahpa) model cube data\n",
-    "infile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.nc')\n",
+    "infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
     "cahpaData = iris.load_cube(infile)\n",
     "\n",
     "# Load and print the ECHAM5 (cahpb) model cube data\n",
-    "infile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.nc')\n",
+    "infile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.nc')\n",
     "cahpbData = iris.load_cube(infile)\n",
     "\n",
     "# Load and print the APHRODITE observation cube data\n",
@@ -309,7 +320,7 @@
     "cahpaData.remove_coord('forecast_period')\n",
     "cahpaData.remove_coord('forecast_reference_time')\n",
     "# Save the new cube as a new netCDF file\n",
-    "outfile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
+    "outfile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
     "iris.save(cahpaData, outfile)"
    ]
   },
@@ -338,7 +349,7 @@
     "# Remove extraneous cube metadata.  This helps make cube comparisons easier later.\n",
     "\n",
     "# Save the new cube as a new netCDF file using the `outfile` filename we've provided below!\n",
-    "outfile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
+    "outfile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
     "\n"
    ]
   },
@@ -373,7 +384,7 @@
     "\n",
     "# Loop through two model runs\n",
     "for jobid in ['cahpa', 'cahpb']:\n",
-    "    infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
+    "    infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
     "\n",
     "    # Load the data\n",
     "    data = iris.load_cube(infile)\n",
@@ -437,14 +448,24 @@
     "Follow step d) and complete the code yourself.  The file name to load is: `aphro.mon.1961_1990.nc`. We've given you the infile and outfile names to make sure you load and save it in the right place for later!"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# we first need to download APHRODITE data\n",
+    "copy_s3_files('s3://ias-pyprecis/data/climatology/*.nc', 'data/climatology/')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Directory names where data is read from and stored to\n",
-    "infile = os.path.join(DATADIR, 'APHRODITE', 'aphro.mon.1961_1990.nc')\n",
+    "infile = os.path.join(DATADIR, 'climatology', 'aphro.mon.1961_1990.nc')\n",
     "\n",
     "\n",
     "# Load the aphrodite data\n",
@@ -460,7 +481,7 @@
     "\n",
     "\n",
     "# save the seasonal mean cube as a NetCDF file\n",
-    "outfile = os.path.join(DATADIR, 'climatology', 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
+    "outfile = os.path.join(DATADIR, 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
     "\n",
     "\n",
     "# print the APHRODITE seasonal mean cube\n",
@@ -550,7 +571,7 @@
    "outputs": [],
    "source": [
     "# Directory name where data is read from\n",
-    "indir = os.path.join(DATADIR, 'climatology')\n",
+    "indir = DATADIR\n",
     "\n",
     "# load cahpa model data\n",
     "infile = os.path.join(indir, 'cahpa.OND.mean.1961_1990.pr.mmday-1.nc')\n",
@@ -663,10 +684,11 @@
   }
  ],
  "metadata": {
+  "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "pyprecis-environment",
+   "display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
    "language": "python",
-   "name": "pyprecis-environment"
+   "name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
   },
   "language_info": {
    "codemirror_mode": {
@@ -686,5 +708,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
diff --git a/notebooks/worksheet3.ipynb b/notebooks/worksheet3.ipynb
@@ -63,9 +63,11 @@
     "import cartopy.crs as ccrs\n",
     "from mpl_toolkits.axes_grid1 import AxesGrid\n",
     "from cartopy.mpl.geoaxes import GeoAxes\n",
+    "from utils import copy_s3_files, flush_data\n",
+    "\n",
     "\n",
     "# Provide the names of the directories where the netCDF model files are stored\n",
-    "DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
+    "DATADIR = 'data/'\n",
     "\n",
     "# Directory name where data is read from\n",
     "HISTDIR = os.path.join(DATADIR, 'historical')\n",
@@ -179,7 +181,7 @@
    "outputs": [],
    "source": [
     "# Load the HadCM3Q0 (cahpa) model cube data as need grid information from it\n",
-    "infile = os.path.join(HISTDIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
+    "infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
     "cahpa_cube = iris.load_cube(infile)\n",
     "\n",
     "pole_lat = cahpa_cube.coord_system().grid_north_pole_latitude\n",
@@ -226,8 +228,8 @@
     "\n",
     "for jobid in ['cahpa', 'cahpb']:\n",
     "    # Directory name where data are read from and stored to\n",
-    "    infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
-    "    \n",
+    "    infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
+    "    print(infile)\n",
     "    # Load the baseline precipitation data using the KL_constraint - the command below\n",
     "    # loads the data into a cube constrained by the area chosen\n",
     "    data = iris.load_cube(infile)\n",
@@ -236,7 +238,7 @@
     "                                grid_latitude=rotated_lats)\n",
     "\n",
     "    # save the constrained cube\n",
-    "    outfile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
+    "    outfile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
     "    iris.save(data_KL, outfile)\n",
     "    print('Saved: {}'.format(outfile))"
    ]
@@ -302,7 +304,7 @@
    "source": [
     "for jobid in ['cahpa', 'cahpb']:\n",
     "    # Set up the path to the data\n",
-    "    infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
+    "    infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
     "    \n",
     "    # Load the data extracted around Kuala Lumpur created in previous step\n",
     "    data = iris.load_cube(infile)\n",
@@ -745,6 +747,17 @@
     "**j) Plot a series of figures** that shows 1) the monthly cycles of temperature and rainfall comparing the 6 models and the observations; and 2) the monthly differences between the models and observations"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# we first need to download CRU and netcdf data\n",
+    "copy_s3_files('s3://ias-pyprecis/data/CRU/*.nc', 'data/CRU/')\n",
+    "copy_s3_files('s3://ias-pyprecis/data/netcdf/*.nc', 'data/netcdf/')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -755,7 +768,7 @@
     "Here are some useful varibles  you might like to use in your scripts\n",
     "'''\n",
     "# Some helpful data locations\n",
-    "DATADIR = '/project/precis/worksheets/data'\n",
+    "DATADIR = 'data'\n",
     "APHRODIR = os.path.join(DATADIR, 'APHRODITE')\n",
     "CRUDIR = os.path.join(DATADIR, 'CRU')\n",
     "CLIMDIR = os.path.join(DATADIR, 'climatology')\n",
@@ -991,10 +1004,11 @@
   }
  ],
  "metadata": {
+  "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "pyprecis-environment",
+   "display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
    "language": "python",
-   "name": "pyprecis-environment"
+   "name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1014,5 +1028,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }