diff --git a/openquake/cat/hmg/check.py b/openquake/cat/hmg/check.py index 23a85f4e3..67ba47796 100644 --- a/openquake/cat/hmg/check.py +++ b/openquake/cat/hmg/check.py @@ -125,8 +125,8 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False): # datetime can only cover 548 years starting in 1677 # general advice will be to exclude historic events and # add those later - subcat = cat[cat['year'] > 1800] - + subcat = cat[(cat['year'] > 1800) & (cat['value'] > 1.0)] + for index, row in tqdm(subcat.iterrows()): # Take the index from delta_ll - this is needed # when delta_ll varies with time. @@ -137,7 +137,7 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False): ll_thrs = ll_d[idx_t][idx_mag] sel_thrs = time_d[idx_t][idx_mag] sel_thrs = sel_thrs.total_seconds() - + # Find events close in time tmp_dff = abs(subcat.loc[:, 'datetime'] - pd.to_datetime(row.datetime)) threshold = datetime.timedelta(seconds=sel_thrs) @@ -150,7 +150,8 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False): minla = row.latitude - ll_thrs maxlo = row.longitude + ll_thrs maxla = row.latitude + ll_thrs - idx_dist = list(sidx.intersection((minlo, minla, maxlo, maxla))) + idx_dist_ind = list(sidx.intersection((minlo, minla, maxlo, maxla))) + idx_dist = cat.index[idx_dist_ind] else: tmp_dist = abs(geodetic_distance( diff --git a/openquake/cat/hmg/hmg.py b/openquake/cat/hmg/hmg.py index 6c9b63b1b..b357ca830 100644 --- a/openquake/cat/hmg/hmg.py +++ b/openquake/cat/hmg/hmg.py @@ -33,6 +33,115 @@ import pandas as pd +def apply_mag_conversion_rule_keep_all(low_mags, conv_eqs, rows, save): + """ + This function applies sequentially a set of rules to the information in + the `save` :class:`pandas.DataFrame` instance. + + :param low_mags: + A list + :param conv_eqs: + A list + :param rows: + The :class:`pandas.DataFrame` instance containing the information + still to be processed + :param save: + One :class:`pandas.DataFrame` instance + :return: + One :class:`pandas.DataFrame` instances with the + homogenised magnitudes. + """ + + # Formatting + fmt2 = "m[m >= {:.2f}]" + + # Temporary assigning magnitude + m = np.round(rows['value'].values, 3) + tmp = np.zeros_like(m) + + for mlow, conversion in zip(low_mags, conv_eqs): + m_inds = m >= mlow + if conversion == 'm': + tmp[m_inds] = m[m_inds] + else: + tmpstr = re.sub('m', fmt2.format(mlow), conversion) + cmd = "tmp[m >= {:.2f}] = {:s}".format(mlow, tmpstr) + + try: + exec(cmd) + except ValueError: + fmt = 'Cannot execute the following conversion rule:\n{:s}' + print(fmt.format(conversion)) + + rows = rows.copy() + rows.loc[:, 'magMw'] = tmp + save = save.copy() + save = pd.concat([save, rows], ignore_index=True, sort=False) + + return save + + +def process_magnitude_keep_all(work, mag_rules): + """ + :param work: + A :class:`pandas.DataFrame` instance obtained by joining the origin + and magnitude dataframes + :param mag_rules: + A dictionary with the rules to be used for processing the catalogue + :return: + Two :class:`pandas.DataFrame` instances. The first one with the + homogenised catalogue, the second one with the information not + processed (if any). + """ + + # Add a column for destination + if "magMw" not in list(work.columns): + work["magMw"] = np.nan + + # This is a new dataframe used to store the processed events + save = pd.DataFrame(columns=work.columns) + + # Looping over agencies + for agency in mag_rules.keys(): + print(' Agency: {:s} ('.format(agency), end="") + + # Looping over magnitude-types + for mag_type in mag_rules[agency].keys(): + print('{:s} '.format(mag_type), end='') + + # Create the first selection condition and select rows + cond = get_mag_selection_condition(agency, mag_type) + try: + rows = work.loc[eval(cond), :] + except ValueError: + fmt = 'Cannot evaluate the following condition:\n {:s}' + print(fmt.format(cond)) + + # TODO + # This is an initial solution that is not ideal since it does + # not take the best information available. + # Remove duplicates. This can happen when we process a magnitude + # type without specifying the agency + + flag = rows["eventID"].duplicated(keep='first') + + if any(flag): + # this line is so the larger M is taken - expiremental based on MEX issue + rows = rows.sort_values("value", ascending=False).drop_duplicates('eventID').sort_index() + #tmp = sorted_rows[~flag].copy() + #rows = tmp + + # Magnitude conversion + if len(rows) > 0: + low_mags = mag_rules[agency][mag_type]['low_mags'] + conv_eqs = mag_rules[agency][mag_type]['conv_eqs'] + save = apply_mag_conversion_rule_keep_all(low_mags, conv_eqs, + rows, save) + print(")") + + return save + + def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work): """ This function applies sequentially a set of rules to the information in @@ -59,12 +168,13 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work): fmt2 = "m[m >= {:.2f}]" # Temporary assigning magnitude - m = rows['value'].values + m = np.round(rows['value'].values, 3) tmp = np.zeros_like(m) for mlow, conversion in zip(low_mags, conv_eqs): + m_inds = m >= mlow if conversion == 'm': - tmp = m + tmp[m_inds] = m[m_inds] else: tmpstr = re.sub('m', fmt2.format(mlow), conversion) cmd = "tmp[m >= {:.2f}] = {:s}".format(mlow, tmpstr) @@ -77,6 +187,7 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work): rows = rows.copy() rows.loc[:, 'magMw'] = tmp + rows = rows.drop(rows[rows['magMw']==0.0].index) save = save.copy() save = pd.concat([save, rows], ignore_index=True, sort=False) @@ -205,10 +316,14 @@ def process_magnitude(work, mag_rules): # not take the best information available. # Remove duplicates. This can happen when we process a magnitude # type without specifying the agency + flag = rows["eventID"].duplicated(keep='first') + if any(flag): - tmp = rows[~flag].copy() - rows = tmp + # this line is so the larger M is taken - expiremental based on MEX issue + rows = rows.sort_values("value", ascending=False).drop_duplicates('eventID').sort_index() + #tmp = sorted_rows[~flag].copy() + #rows = tmp # Magnitude conversion if len(rows) > 0: @@ -262,6 +377,9 @@ def process_dfs(odf_fname, mdf_fname, settings_fname=None): work = pd.merge(odf, mdf, on=["eventID"]) save, work = process_magnitude(work, rules['magnitude']) + work_all_m = pd.merge(odf, mdf, on=["eventID"]) + save_all_m = process_magnitude_keep_all(work_all_m, rules['magnitude']) + print("Number of origins with final mag type {:d}\n".format(len(save))) computed = len(save) @@ -271,4 +389,4 @@ def process_dfs(odf_fname, mdf_fname, settings_fname=None): msg = fmt.format(computed - expected) warnings.warn(msg) - return save, work + return save, work, save_all_m diff --git a/openquake/cat/hmg/plot.py b/openquake/cat/hmg/plot.py index 645262033..b0618f0cb 100644 --- a/openquake/cat/hmg/plot.py +++ b/openquake/cat/hmg/plot.py @@ -56,7 +56,7 @@ def get_hists(df, bins, agencies=None, column="magMw"): out = [] out_agencies = [] for key in agencies: - mw = df[df['magAgency'] == key][column] + mw = df[df['magAgency'] == key][column].apply(lambda x: round(x, 5)) if len(mw): hist, _ = np.histogram(mw, bins=bins) out.append(hist) @@ -212,7 +212,8 @@ def plot_histogram(df, agencies=None, wdt=0.1, column="magMw", mpl.rcParams['axes.labelsize'] = 16 # Data - mw = df[column].values +# mw = df[column].values + mw = df[column].apply(lambda x: round(x, 5)).values # Creating bins and total histogram mmi = np.floor(min(mw)/wdt)*wdt-wdt @@ -259,7 +260,7 @@ def plot_histogram(df, agencies=None, wdt=0.1, column="magMw", # Save figure folder = os.path.dirname(fname) Path(folder).mkdir(parents=True, exist_ok=True) - plt.savefig(fname) + plt.savefig(fname,bbox_inches='tight') if "xlim" in kwargs: ax.set_xlim(kwargs["xlim"]) diff --git a/openquake/cat/hmg/utils.py b/openquake/cat/hmg/utils.py index 1cec9d26b..5627d8f78 100644 --- a/openquake/cat/hmg/utils.py +++ b/openquake/cat/hmg/utils.py @@ -25,27 +25,48 @@ # coding: utf-8 import pandas as pd +import geopandas as gpd -def to_hmtk_catalogue(cdf: pd.DataFrame): +def to_hmtk_catalogue(cdf: pd.DataFrame, polygon=None): """ Converts a catalogue obtained from the homogenisation into a format compatible with the oq-hmtk. :param cdf: An instance of :class:`pd.DataFrame` + :param polygon: + Polygon as shapefile which will be used to clip the catalogue extent :returns: An instance of :class:`pd.DataFrame` """ + # if there is a polygon clip the catalogue to it + if polygon: + + # convert df to gdf + cgdf = pd.DataFrame(cdf) + tmp = gpd.points_from_xy(cgdf.longitude.values, cgdf.latitude.values) + cgdf = gpd.GeoDataFrame(cgdf, geometry=tmp, crs="EPSG:4326") + + # Reading shapefile and dissolving polygons into a single one + boundaries = gpd.read_file(polygon) + boundaries['dummy'] = 'dummy' + geom = boundaries.dissolve(by='dummy').geometry[0] + + # clip the catalogue + tmpgeo = {'geometry': [geom]} + gdf = gpd.GeoDataFrame(tmpgeo, crs="EPSG:4326") + cdf = gpd.sjoin(cgdf, gdf, how="inner", op='intersects') + # Select columns # Check if catalogue contains strike/dip/rake and retain if it does if 'str1' in cdf.columns: - col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'longitude', - 'latitude', 'depth', 'magMw', 'str1', 'dip1', 'rake1', 'str2', 'dip2', 'rake2'] + col_list = ['eventID', 'Agency', 'year', 'month', 'day','hour','minute','second', 'longitude', + 'latitude', 'depth', 'magMw', 'sigma', 'str1', 'dip1', 'rake1', 'str2', 'dip2', 'rake2'] else: - col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'longitude', - 'latitude', 'depth', 'magMw'] + col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'hour','minute','second', 'longitude', + 'latitude', 'depth', 'magMw', 'sigma'] cdf = cdf[col_list] diff --git a/openquake/cat/tests/data/test_hmg/test_hmg_mtab.h5 b/openquake/cat/tests/data/test_hmg/test_hmg_mtab.h5 new file mode 100644 index 000000000..5b26202b5 Binary files /dev/null and b/openquake/cat/tests/data/test_hmg/test_hmg_mtab.h5 differ diff --git a/openquake/cat/tests/data/test_hmg/test_hmg_mtab_clip.h5 b/openquake/cat/tests/data/test_hmg/test_hmg_mtab_clip.h5 new file mode 100644 index 000000000..0f693cb82 Binary files /dev/null and b/openquake/cat/tests/data/test_hmg/test_hmg_mtab_clip.h5 differ diff --git a/openquake/cat/tests/data/test_hmg/test_hmg_otab.h5 b/openquake/cat/tests/data/test_hmg/test_hmg_otab.h5 new file mode 100644 index 000000000..003ed06b9 Binary files /dev/null and b/openquake/cat/tests/data/test_hmg/test_hmg_otab.h5 differ diff --git a/openquake/cat/tests/hmg_test.py b/openquake/cat/tests/hmg_test.py new file mode 100644 index 000000000..3f9051193 --- /dev/null +++ b/openquake/cat/tests/hmg_test.py @@ -0,0 +1,78 @@ +# ------------------- The OpenQuake Model Building Toolkit -------------------- +# Copyright (C) 2022 GEM Foundation +# _______ _______ __ __ _______ _______ ___ _ +# | || | | |_| || _ || || | | | +# | _ || _ | ____ | || |_| ||_ _|| |_| | +# | | | || | | ||____|| || | | | | _| +# | |_| || |_| | | || _ | | | | |_ +# | || | | ||_|| || |_| | | | | _ | +# |_______||____||_| |_| |_||_______| |___| |___| |_| +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# ----------------------------------------------------------------------------- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# coding: utf-8 + +import os +import unittest +import tempfile +import numpy as np +import pandas as pd +import toml + +from openquake.cat.hmg.hmg import process_dfs, process_magnitude + +aeq = np.testing.assert_equal +aaeq = np.testing.assert_almost_equal + + +BASE_PATH = os.path.dirname(__file__) + +SETTINGS_HOMOG = """ +[magnitude.NEIC.mb] +#from weatherill +low_mags = [5.0, 6.0] +conv_eqs = ["0.8 * m + 0.2", "m"] +sigma = [0.283] +""" + + + +class HomogeniseNEICmbTestCase(unittest.TestCase): + + def setUp(self): + + self.data_path = os.path.join(BASE_PATH, 'data', 'test_hmg') + + def test_case01(self): + """ + tests that magnitudes are converted correctly for an example + with two "low_mags" values, neither of which is 0 + """ + td = toml.loads(SETTINGS_HOMOG) + + # Reading otab and mtab + fname_otab = os.path.join(self.data_path, "test_hmg_otab.h5") + odf = pd.read_hdf(fname_otab) + fname_mtab = os.path.join(self.data_path, "test_hmg_mtab_clip.h5") + mdf = pd.read_hdf(fname_mtab) + work = pd.merge(odf, mdf, on=["eventID", "originID"]) + save = pd.DataFrame(columns=work.columns) + + rules = toml.loads(SETTINGS_HOMOG) + save, work = process_magnitude(work, rules['magnitude']) + + results = save.magMw.values + expected = [4.36, 4.76, 6.8 ] + aaeq(results, expected, decimal=6) diff --git a/openquake/mbi/cat/create_csv.py b/openquake/mbi/cat/create_csv.py index b564764ec..70256a7bc 100644 --- a/openquake/mbi/cat/create_csv.py +++ b/openquake/mbi/cat/create_csv.py @@ -60,6 +60,8 @@ def main(cat_fname, fname_out): create_folder(os.path.dirname(fname_out)) # Save file + # writing magnitudes more precisely + df.magMw = df.magMw.apply(lambda x: round(x, 5)) df.to_csv(fname_out, index=False) # Create hmtk file @@ -68,6 +70,8 @@ def main(cat_fname, fname_out): tmps = ofle.split('.') ofle = f'{tmps[0]}_hmtk.csv' odf = to_hmtk_catalogue(df) + # writing magnitudes more precisely + odf.magnitude = odf.magnitude.apply(lambda x: round(x, 2)) odf.to_csv(os.path.join(odir, ofle), index=False) diff --git a/openquake/mbi/cat/homogenise.py b/openquake/mbi/cat/homogenise.py index 7ec1e5c80..b06c98496 100644 --- a/openquake/mbi/cat/homogenise.py +++ b/openquake/mbi/cat/homogenise.py @@ -38,7 +38,9 @@ def main(settings, odf_fname, mdf_fname, outfolder='./h5/'): """ # Homogenise - save, work = process_dfs(odf_fname, mdf_fname, settings) + save, work, save_all_m = process_dfs(odf_fname, mdf_fname, settings) + save.magMw = save.magMw.apply(lambda x: round(x, 5)) + # Outname fname = os.path.basename(odf_fname).split('_')[0] @@ -52,6 +54,10 @@ def main(settings, odf_fname, mdf_fname, outfolder='./h5/'): tmp = os.path.join(outfolder, fmt.format(fname)) work.to_hdf(tmp, '/events', append=False) + fmt = '{:s}_all_m.h5' + tmp = os.path.join(outfolder, fmt.format(fname)) + save_all_m.to_hdf(tmp, '/events', append=False) + main.settings = '.toml file with the settings' main.odf_fname = '.h5 file with origins' diff --git a/openquake/mbi/cat/purge_earthquakes.py b/openquake/mbi/cat/purge_earthquakes.py index b6648c571..18c2f3044 100644 --- a/openquake/mbi/cat/purge_earthquakes.py +++ b/openquake/mbi/cat/purge_earthquakes.py @@ -50,31 +50,18 @@ def main(fname_cat, fname_cat_out, fname_csv): # # Read catalogue cat = pd.read_hdf(fname_cat) - cat_out = pd.read_hdf(fname_cat_out) print('The catalogue contains {:d} earthquakes'.format(len(cat))) # # Read file with the list of IDs - fle = open(fname_csv, mode='r') - keys = [] - for line in fle: - aa = line.rstrip().split(',') - keys.append([re.sub(' ', '', aa[0]), re.sub(' ', '', aa[1])]) - fle.close() - - # - # Move events - for key in keys: - series = cat[cat[key[0]] == key[1]] - cat_out.append(series) - cat_out.to_hdf(fname_cat_out, '/events', append=False) + event_df = pd.read_csv(fname_csv) # # Drop events - for key in keys: - cat.drop(cat[cat[key[0]] == key[1]].index, inplace=True) + for key in event_df['eventID'].values: + cat.drop(cat[cat['eventID'] == key].index, inplace=True) print('The catalogue contains {:d} earthquakes'.format(len(cat))) - cat.to_hdf(fname_cat, '/events', append=False) + cat.to_hdf(fname_cat_out, '/events', append=False) main.fname_cat = '.h5 file with origins' diff --git a/openquake/sub/get_profiles_from_slab2pt0.py b/openquake/sub/get_profiles_from_slab2pt0.py index 8256a3848..88be92d27 100644 --- a/openquake/sub/get_profiles_from_slab2pt0.py +++ b/openquake/sub/get_profiles_from_slab2pt0.py @@ -28,15 +28,15 @@ import netCDF4 import numpy as np from numba import njit -from openquake.hazardlib.geo.geodetic import ( - point_at, npoints_towards, geodetic_distance, azimuth) +from openquake.hazardlib.geo.geodetic import (point_at, npoints_towards, + geodetic_distance, azimuth) from openquake.sub.cross_sections import CrossSection, Slab2pt0 pygmt_available = False try: import pygmt -except ImportError: pygmt_available = True +except ImportError: pass @@ -402,6 +402,28 @@ def get_profiles(fname_str: str, fname_dep: str, spacing: float, fname_fig: fig.savefig(fname_fig) fig.show() + else: + from matplotlib import pyplot as plt + plt.scatter(depths[:, 0], depths[:, 1], c=-depths[:, 2]) + for i, pro in enumerate(traces): + plt.plot(pro[:, 0], pro[:, 1], 'k') + plt.text(pro[0, 0], pro[0, 1], f'{i}') + + for key in slb.profiles: + pro = slb.profiles[key] + if pro.shape[0] > 0: + plt.plot(pro[:, 0], pro[:, 1], c='r') + + if max(reg[0], reg[1]) > 180: + xmin = reg[0]-360; xmax = reg[1]-360 + else: + xmin = reg[0]; xmax = reg[1] + plt.xlim([xmin, xmax]) + plt.colorbar(label='depth to slab (km)') + plt.savefig(fname_fig) + + + return slb