Skip to content

Commit

Permalink
Merge pull request #114 from MetOffice/update/aws-input
Browse files Browse the repository at this point in the history
Update/aws input
  • Loading branch information
zmaalick authored Mar 16, 2022
2 parents 2ca552e + 95f8155 commit c3c66be
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 58 deletions.
4 changes: 3 additions & 1 deletion notebooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def copy_s3_files(in_fileglob, out_folder):
in_fileglob: s3 uri of flies (wild card can be used)
out_folder: local path where data will be stored
'''
if os.path.isdir(out_folder) == 0:
mode = 0o777
os.makedirs(out_folder, mode, exist_ok = False)
matching_keys = find_matching_s3_keys(in_fileglob)
in_bucket_name = _split_s3_uri(in_fileglob)[0]
out_scheme = urlparse(out_folder).scheme
Expand Down Expand Up @@ -120,7 +123,6 @@ def load_data(inpath):
for key in keys:
file = key.split('/')[-1]
if os.path.exists(os.path.join(temp_path, file)) == 0:
print(os.path.join(s3dir, file))
copy_s3_files(os.path.join(s3dir, file), temp_path)
else:
print(key, ' already exist')
Expand Down
67 changes: 52 additions & 15 deletions notebooks/worksheet1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,19 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"# download data from S3 bucket in data directory\n",
"from utils import copy_s3_files, flush_data\n",
"\n",
"copy_s3_files('s3://ias-pyprecis/data/sample_data.nc', 'data/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import the necessary modules\n",
Expand All @@ -331,7 +341,7 @@
"%matplotlib inline \n",
"\n",
"# provide the path of your sample data\n",
"sample_data = '/project/ciid/projects/PRECIS/worksheets/data/sample_data.nc'\n",
"sample_data = 'data/sample_data.nc'\n",
"\n",
"# Constraint the reading to a single variable and load it into an Iris cube\n",
"cube = iris.load_cube(sample_data)\n",
Expand Down Expand Up @@ -454,18 +464,29 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"# download data from S3 buket to data directory\n",
"from utils import copy_s3_files\n",
"\n",
"copy_s3_files('s3://ias-pyprecis/data/pp/cahpa/*', 'data/pp/cahpa/')\n",
"copy_s3_files('s3://ias-pyprecis/data/pp/cahpb/*', 'data/pp/cahpb/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"datadir = '/project/ciid/projects/PRECIS/worksheets/data'\n",
"datadir = 'data/'\n",
"\n",
"rim_width = 8 # width of rim (in number of grid boxes)\n",
"\n",
"for runid in ['cahpa', 'cahpb']:\n",
" ppdir = os.path.join(datadir, 'pp', runid)\n",
" \n",
"\n",
" # find all the files from which to remove the rim\n",
" file_list = glob.glob(ppdir + '/*pm[ghij]*.pp')\n",
" \n",
Expand All @@ -483,13 +504,28 @@
" # add meta data stating that rim has been removed\n",
" rrcube.attributes['rim_removed'] = '{} point rim removed'.format(rim_width)\n",
" trimmed_cubes.append(rrcube)\n",
" \n",
" rrcubes = iris.cube.CubeList(trimmed_cubes)\n",
" # Write out the trimmed data file\n",
" outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
" #outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
" outfile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
"\n",
" iris.save(rrcubes, outfile)\n",
" print('Saved {}'.format(outfile))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Delete pp data from the disk\n",
"from utils import flush_data\n",
"flush_data('data/pp/cahpa/*')\n",
"flush_data('data/pp/cahpb/*')"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -564,14 +600,14 @@
"\n",
"for runid in ['cahpa', 'cahpb']:\n",
" # Get data directory\n",
" infile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.rr.nc')\n",
" infile = os.path.join(datadir, runid + '.mon.1961_1990.rr.nc')\n",
" # This will load all the variables in the file into a CubeList\n",
" datacubes = iris.load(infile)\n",
" for cube in datacubes:\n",
" # get the STASH code\n",
" cubeSTASH = cube.attributes['STASH']\n",
" # Make the output file name\n",
" outfile = os.path.join(datadir, 'historical', runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
" outfile = os.path.join(datadir, runid + '.mon.1961_1990.' + stash_codes[str(cubeSTASH)] + '.rr.nc')\n",
" # Save the file\n",
" iris.save(cube, outfile)\n",
" print('Saved {}'.format(outfile)) "
Expand Down Expand Up @@ -653,10 +689,11 @@
}
],
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "pyprecis-environment",
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
"language": "python",
"name": "pyprecis-environment"
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -676,5 +713,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
48 changes: 35 additions & 13 deletions notebooks/worksheet2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@
"import iris.quickplot as qplt\n",
"import cartopy.crs as ccrs\n",
"from mpl_toolkits.axes_grid1 import AxesGrid\n",
"from cartopy.mpl.geoaxes import GeoAxes"
"from cartopy.mpl.geoaxes import GeoAxes\n",
"from utils import copy_s3_files, flush_data"
]
},
{
Expand Down Expand Up @@ -104,21 +105,31 @@
"Before running the code, take a look at it line-by-line to understand what steps are being taken. Then click in the box and press <kbd>ctrl</kbd> + <kbd>enter</kbd> to run the code."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# we first need to download APHRODITE data\n",
"copy_s3_files('s3://ias-pyprecis/data/APHRODITE/*.nc', 'data/APHRODITE/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Provide the names of the directories where the netCDF model files are stored\n",
"DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
"DATADIR = 'data/'\n",
"\n",
"# Load and print the HadCM3Q0 (cahpa) model cube data\n",
"infile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.nc')\n",
"infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
"cahpaData = iris.load_cube(infile)\n",
"\n",
"# Load and print the ECHAM5 (cahpb) model cube data\n",
"infile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.nc')\n",
"infile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.nc')\n",
"cahpbData = iris.load_cube(infile)\n",
"\n",
"# Load and print the APHRODITE observation cube data\n",
Expand Down Expand Up @@ -309,7 +320,7 @@
"cahpaData.remove_coord('forecast_period')\n",
"cahpaData.remove_coord('forecast_reference_time')\n",
"# Save the new cube as a new netCDF file\n",
"outfile = os.path.join(DATADIR, 'historical', 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
"outfile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.mmday-1.nc')\n",
"iris.save(cahpaData, outfile)"
]
},
Expand Down Expand Up @@ -338,7 +349,7 @@
"# Remove extraneous cube metadata. This helps make cube comparisons easier later.\n",
"\n",
"# Save the new cube as a new netCDF file using the `outfile` filename we've provided below!\n",
"outfile = os.path.join(DATADIR, 'historical', 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
"outfile = os.path.join(DATADIR, 'cahpb.mon.1961_1990.pr.rr.mmday-1.nc')\n",
"\n"
]
},
Expand Down Expand Up @@ -373,7 +384,7 @@
"\n",
"# Loop through two model runs\n",
"for jobid in ['cahpa', 'cahpb']:\n",
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
"\n",
" # Load the data\n",
" data = iris.load_cube(infile)\n",
Expand Down Expand Up @@ -437,14 +448,24 @@
"Follow step d) and complete the code yourself. The file name to load is: `aphro.mon.1961_1990.nc`. We've given you the infile and outfile names to make sure you load and save it in the right place for later!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# we first need to download APHRODITE data\n",
"copy_s3_files('s3://ias-pyprecis/data/climatology/*.nc', 'data/climatology/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Directory names where data is read from and stored to\n",
"infile = os.path.join(DATADIR, 'APHRODITE', 'aphro.mon.1961_1990.nc')\n",
"infile = os.path.join(DATADIR, 'climatology', 'aphro.mon.1961_1990.nc')\n",
"\n",
"\n",
"# Load the aphrodite data\n",
Expand All @@ -460,7 +481,7 @@
"\n",
"\n",
"# save the seasonal mean cube as a NetCDF file\n",
"outfile = os.path.join(DATADIR, 'climatology', 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
"outfile = os.path.join(DATADIR, 'aphro.OND.mean.1961_1990.pr.mmday-1.nc')\n",
"\n",
"\n",
"# print the APHRODITE seasonal mean cube\n",
Expand Down Expand Up @@ -550,7 +571,7 @@
"outputs": [],
"source": [
"# Directory name where data is read from\n",
"indir = os.path.join(DATADIR, 'climatology')\n",
"indir = DATADIR\n",
"\n",
"# load cahpa model data\n",
"infile = os.path.join(indir, 'cahpa.OND.mean.1961_1990.pr.mmday-1.nc')\n",
Expand Down Expand Up @@ -663,10 +684,11 @@
}
],
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "pyprecis-environment",
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
"language": "python",
"name": "pyprecis-environment"
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -686,5 +708,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}
34 changes: 24 additions & 10 deletions notebooks/worksheet3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@
"import cartopy.crs as ccrs\n",
"from mpl_toolkits.axes_grid1 import AxesGrid\n",
"from cartopy.mpl.geoaxes import GeoAxes\n",
"from utils import copy_s3_files, flush_data\n",
"\n",
"\n",
"# Provide the names of the directories where the netCDF model files are stored\n",
"DATADIR = '/project/ciid/projects/PRECIS/worksheets/data/'\n",
"DATADIR = 'data/'\n",
"\n",
"# Directory name where data is read from\n",
"HISTDIR = os.path.join(DATADIR, 'historical')\n",
Expand Down Expand Up @@ -179,7 +181,7 @@
"outputs": [],
"source": [
"# Load the HadCM3Q0 (cahpa) model cube data as need grid information from it\n",
"infile = os.path.join(HISTDIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
"infile = os.path.join(DATADIR, 'cahpa.mon.1961_1990.pr.rr.nc')\n",
"cahpa_cube = iris.load_cube(infile)\n",
"\n",
"pole_lat = cahpa_cube.coord_system().grid_north_pole_latitude\n",
Expand Down Expand Up @@ -226,8 +228,8 @@
"\n",
"for jobid in ['cahpa', 'cahpb']:\n",
" # Directory name where data are read from and stored to\n",
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
" \n",
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.nc')\n",
" print(infile)\n",
" # Load the baseline precipitation data using the KL_constraint - the command below\n",
" # loads the data into a cube constrained by the area chosen\n",
" data = iris.load_cube(infile)\n",
Expand All @@ -236,7 +238,7 @@
" grid_latitude=rotated_lats)\n",
"\n",
" # save the constrained cube\n",
" outfile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
" outfile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
" iris.save(data_KL, outfile)\n",
" print('Saved: {}'.format(outfile))"
]
Expand Down Expand Up @@ -302,7 +304,7 @@
"source": [
"for jobid in ['cahpa', 'cahpb']:\n",
" # Set up the path to the data\n",
" infile = os.path.join(DATADIR, 'historical', jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
" infile = os.path.join(DATADIR, jobid + '.mon.1961_1990.pr.rr.mmday-1.KL.nc')\n",
" \n",
" # Load the data extracted around Kuala Lumpur created in previous step\n",
" data = iris.load_cube(infile)\n",
Expand Down Expand Up @@ -745,6 +747,17 @@
"**j) Plot a series of figures** that shows 1) the monthly cycles of temperature and rainfall comparing the 6 models and the observations; and 2) the monthly differences between the models and observations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# we first need to download CRU and netcdf data\n",
"copy_s3_files('s3://ias-pyprecis/data/CRU/*.nc', 'data/CRU/')\n",
"copy_s3_files('s3://ias-pyprecis/data/netcdf/*.nc', 'data/netcdf/')"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -755,7 +768,7 @@
"Here are some useful varibles you might like to use in your scripts\n",
"'''\n",
"# Some helpful data locations\n",
"DATADIR = '/project/precis/worksheets/data'\n",
"DATADIR = 'data'\n",
"APHRODIR = os.path.join(DATADIR, 'APHRODITE')\n",
"CRUDIR = os.path.join(DATADIR, 'CRU')\n",
"CLIMDIR = os.path.join(DATADIR, 'climatology')\n",
Expand Down Expand Up @@ -991,10 +1004,11 @@
}
],
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "pyprecis-environment",
"display_name": "Python [conda env:pyprecis-environment] (arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1)",
"language": "python",
"name": "pyprecis-environment"
"name": "conda-env-pyprecis-environment-py__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-2:198477955030:image-version/abtraining/1"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1014,5 +1028,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}
Loading

0 comments on commit c3c66be

Please sign in to comment.