Skip to content

Commit

Permalink
Merge pull request #358 from GEMScienceTools/add_h5_cat_clipper
Browse files Browse the repository at this point in the history
WIP: Add h5 cat clipper
  • Loading branch information
kejohnso authored Nov 2, 2023
2 parents 61723cb + 27b110d commit 96d1ebc
Show file tree
Hide file tree
Showing 12 changed files with 276 additions and 38 deletions.
9 changes: 5 additions & 4 deletions openquake/cat/hmg/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False):
# datetime can only cover 548 years starting in 1677
# general advice will be to exclude historic events and
# add those later
subcat = cat[cat['year'] > 1800]

subcat = cat[(cat['year'] > 1800) & (cat['value'] > 1.0)]
for index, row in tqdm(subcat.iterrows()):
# Take the index from delta_ll - this is needed
# when delta_ll varies with time.
Expand All @@ -137,7 +137,7 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False):
ll_thrs = ll_d[idx_t][idx_mag]
sel_thrs = time_d[idx_t][idx_mag]
sel_thrs = sel_thrs.total_seconds()

# Find events close in time
tmp_dff = abs(subcat.loc[:, 'datetime'] - pd.to_datetime(row.datetime))
threshold = datetime.timedelta(seconds=sel_thrs)
Expand All @@ -150,7 +150,8 @@ def process(cat, sidx, delta_ll, delta_t, fname_geojson, use_kms=False):
minla = row.latitude - ll_thrs
maxlo = row.longitude + ll_thrs
maxla = row.latitude + ll_thrs
idx_dist = list(sidx.intersection((minlo, minla, maxlo, maxla)))
idx_dist_ind = list(sidx.intersection((minlo, minla, maxlo, maxla)))
idx_dist = cat.index[idx_dist_ind]

else:
tmp_dist = abs(geodetic_distance(
Expand Down
128 changes: 123 additions & 5 deletions openquake/cat/hmg/hmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,115 @@
import pandas as pd


def apply_mag_conversion_rule_keep_all(low_mags, conv_eqs, rows, save):
"""
This function applies sequentially a set of rules to the information in
the `save` :class:`pandas.DataFrame` instance.
:param low_mags:
A list
:param conv_eqs:
A list
:param rows:
The :class:`pandas.DataFrame` instance containing the information
still to be processed
:param save:
One :class:`pandas.DataFrame` instance
:return:
One :class:`pandas.DataFrame` instances with the
homogenised magnitudes.
"""

# Formatting
fmt2 = "m[m >= {:.2f}]"

# Temporary assigning magnitude
m = np.round(rows['value'].values, 3)
tmp = np.zeros_like(m)

for mlow, conversion in zip(low_mags, conv_eqs):
m_inds = m >= mlow
if conversion == 'm':
tmp[m_inds] = m[m_inds]
else:
tmpstr = re.sub('m', fmt2.format(mlow), conversion)
cmd = "tmp[m >= {:.2f}] = {:s}".format(mlow, tmpstr)

try:
exec(cmd)
except ValueError:
fmt = 'Cannot execute the following conversion rule:\n{:s}'
print(fmt.format(conversion))

rows = rows.copy()
rows.loc[:, 'magMw'] = tmp
save = save.copy()
save = pd.concat([save, rows], ignore_index=True, sort=False)

return save


def process_magnitude_keep_all(work, mag_rules):
"""
:param work:
A :class:`pandas.DataFrame` instance obtained by joining the origin
and magnitude dataframes
:param mag_rules:
A dictionary with the rules to be used for processing the catalogue
:return:
Two :class:`pandas.DataFrame` instances. The first one with the
homogenised catalogue, the second one with the information not
processed (if any).
"""

# Add a column for destination
if "magMw" not in list(work.columns):
work["magMw"] = np.nan

# This is a new dataframe used to store the processed events
save = pd.DataFrame(columns=work.columns)

# Looping over agencies
for agency in mag_rules.keys():
print(' Agency: {:s} ('.format(agency), end="")

# Looping over magnitude-types
for mag_type in mag_rules[agency].keys():
print('{:s} '.format(mag_type), end='')

# Create the first selection condition and select rows
cond = get_mag_selection_condition(agency, mag_type)
try:
rows = work.loc[eval(cond), :]
except ValueError:
fmt = 'Cannot evaluate the following condition:\n {:s}'
print(fmt.format(cond))

# TODO
# This is an initial solution that is not ideal since it does
# not take the best information available.
# Remove duplicates. This can happen when we process a magnitude
# type without specifying the agency

flag = rows["eventID"].duplicated(keep='first')

if any(flag):
# this line is so the larger M is taken - expiremental based on MEX issue
rows = rows.sort_values("value", ascending=False).drop_duplicates('eventID').sort_index()
#tmp = sorted_rows[~flag].copy()
#rows = tmp

# Magnitude conversion
if len(rows) > 0:
low_mags = mag_rules[agency][mag_type]['low_mags']
conv_eqs = mag_rules[agency][mag_type]['conv_eqs']
save = apply_mag_conversion_rule_keep_all(low_mags, conv_eqs,
rows, save)
print(")")

return save


def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work):
"""
This function applies sequentially a set of rules to the information in
Expand All @@ -59,12 +168,13 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work):
fmt2 = "m[m >= {:.2f}]"

# Temporary assigning magnitude
m = rows['value'].values
m = np.round(rows['value'].values, 3)
tmp = np.zeros_like(m)

for mlow, conversion in zip(low_mags, conv_eqs):
m_inds = m >= mlow
if conversion == 'm':
tmp = m
tmp[m_inds] = m[m_inds]
else:
tmpstr = re.sub('m', fmt2.format(mlow), conversion)
cmd = "tmp[m >= {:.2f}] = {:s}".format(mlow, tmpstr)
Expand All @@ -77,6 +187,7 @@ def apply_mag_conversion_rule(low_mags, conv_eqs, rows, save, work):

rows = rows.copy()
rows.loc[:, 'magMw'] = tmp
rows = rows.drop(rows[rows['magMw']==0.0].index)
save = save.copy()
save = pd.concat([save, rows], ignore_index=True, sort=False)

Expand Down Expand Up @@ -205,10 +316,14 @@ def process_magnitude(work, mag_rules):
# not take the best information available.
# Remove duplicates. This can happen when we process a magnitude
# type without specifying the agency

flag = rows["eventID"].duplicated(keep='first')

if any(flag):
tmp = rows[~flag].copy()
rows = tmp
# this line is so the larger M is taken - expiremental based on MEX issue
rows = rows.sort_values("value", ascending=False).drop_duplicates('eventID').sort_index()
#tmp = sorted_rows[~flag].copy()
#rows = tmp

# Magnitude conversion
if len(rows) > 0:
Expand Down Expand Up @@ -262,6 +377,9 @@ def process_dfs(odf_fname, mdf_fname, settings_fname=None):
work = pd.merge(odf, mdf, on=["eventID"])
save, work = process_magnitude(work, rules['magnitude'])

work_all_m = pd.merge(odf, mdf, on=["eventID"])
save_all_m = process_magnitude_keep_all(work_all_m, rules['magnitude'])

print("Number of origins with final mag type {:d}\n".format(len(save)))

computed = len(save)
Expand All @@ -271,4 +389,4 @@ def process_dfs(odf_fname, mdf_fname, settings_fname=None):
msg = fmt.format(computed - expected)
warnings.warn(msg)

return save, work
return save, work, save_all_m
7 changes: 4 additions & 3 deletions openquake/cat/hmg/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_hists(df, bins, agencies=None, column="magMw"):
out = []
out_agencies = []
for key in agencies:
mw = df[df['magAgency'] == key][column]
mw = df[df['magAgency'] == key][column].apply(lambda x: round(x, 5))
if len(mw):
hist, _ = np.histogram(mw, bins=bins)
out.append(hist)
Expand Down Expand Up @@ -212,7 +212,8 @@ def plot_histogram(df, agencies=None, wdt=0.1, column="magMw",
mpl.rcParams['axes.labelsize'] = 16

# Data
mw = df[column].values
# mw = df[column].values
mw = df[column].apply(lambda x: round(x, 5)).values

# Creating bins and total histogram
mmi = np.floor(min(mw)/wdt)*wdt-wdt
Expand Down Expand Up @@ -259,7 +260,7 @@ def plot_histogram(df, agencies=None, wdt=0.1, column="magMw",
# Save figure
folder = os.path.dirname(fname)
Path(folder).mkdir(parents=True, exist_ok=True)
plt.savefig(fname)
plt.savefig(fname,bbox_inches='tight')

if "xlim" in kwargs:
ax.set_xlim(kwargs["xlim"])
Expand Down
31 changes: 26 additions & 5 deletions openquake/cat/hmg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,48 @@
# coding: utf-8

import pandas as pd
import geopandas as gpd


def to_hmtk_catalogue(cdf: pd.DataFrame):
def to_hmtk_catalogue(cdf: pd.DataFrame, polygon=None):
"""
Converts a catalogue obtained from the homogenisation into a format
compatible with the oq-hmtk.
:param cdf:
An instance of :class:`pd.DataFrame`
:param polygon:
Polygon as shapefile which will be used to clip the catalogue extent
:returns:
An instance of :class:`pd.DataFrame`
"""

# if there is a polygon clip the catalogue to it
if polygon:

# convert df to gdf
cgdf = pd.DataFrame(cdf)
tmp = gpd.points_from_xy(cgdf.longitude.values, cgdf.latitude.values)
cgdf = gpd.GeoDataFrame(cgdf, geometry=tmp, crs="EPSG:4326")

# Reading shapefile and dissolving polygons into a single one
boundaries = gpd.read_file(polygon)
boundaries['dummy'] = 'dummy'
geom = boundaries.dissolve(by='dummy').geometry[0]

# clip the catalogue
tmpgeo = {'geometry': [geom]}
gdf = gpd.GeoDataFrame(tmpgeo, crs="EPSG:4326")
cdf = gpd.sjoin(cgdf, gdf, how="inner", op='intersects')

# Select columns
# Check if catalogue contains strike/dip/rake and retain if it does
if 'str1' in cdf.columns:
col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'longitude',
'latitude', 'depth', 'magMw', 'str1', 'dip1', 'rake1', 'str2', 'dip2', 'rake2']
col_list = ['eventID', 'Agency', 'year', 'month', 'day','hour','minute','second', 'longitude',
'latitude', 'depth', 'magMw', 'sigma', 'str1', 'dip1', 'rake1', 'str2', 'dip2', 'rake2']
else:
col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'longitude',
'latitude', 'depth', 'magMw']
col_list = ['eventID', 'Agency', 'year', 'month', 'day', 'hour','minute','second', 'longitude',
'latitude', 'depth', 'magMw', 'sigma']

cdf = cdf[col_list]

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
78 changes: 78 additions & 0 deletions openquake/cat/tests/hmg_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# ------------------- The OpenQuake Model Building Toolkit --------------------
# Copyright (C) 2022 GEM Foundation
# _______ _______ __ __ _______ _______ ___ _
# | || | | |_| || _ || || | | |
# | _ || _ | ____ | || |_| ||_ _|| |_| |
# | | | || | | ||____|| || | | | | _|
# | |_| || |_| | | || _ | | | | |_
# | || | | ||_|| || |_| | | | | _ |
# |_______||____||_| |_| |_||_______| |___| |___| |_|
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# coding: utf-8

import os
import unittest
import tempfile
import numpy as np
import pandas as pd
import toml

from openquake.cat.hmg.hmg import process_dfs, process_magnitude

aeq = np.testing.assert_equal
aaeq = np.testing.assert_almost_equal


BASE_PATH = os.path.dirname(__file__)

SETTINGS_HOMOG = """
[magnitude.NEIC.mb]
#from weatherill
low_mags = [5.0, 6.0]
conv_eqs = ["0.8 * m + 0.2", "m"]
sigma = [0.283]
"""



class HomogeniseNEICmbTestCase(unittest.TestCase):

def setUp(self):

self.data_path = os.path.join(BASE_PATH, 'data', 'test_hmg')

def test_case01(self):
"""
tests that magnitudes are converted correctly for an example
with two "low_mags" values, neither of which is 0
"""
td = toml.loads(SETTINGS_HOMOG)

# Reading otab and mtab
fname_otab = os.path.join(self.data_path, "test_hmg_otab.h5")
odf = pd.read_hdf(fname_otab)
fname_mtab = os.path.join(self.data_path, "test_hmg_mtab_clip.h5")
mdf = pd.read_hdf(fname_mtab)
work = pd.merge(odf, mdf, on=["eventID", "originID"])
save = pd.DataFrame(columns=work.columns)

rules = toml.loads(SETTINGS_HOMOG)
save, work = process_magnitude(work, rules['magnitude'])

results = save.magMw.values
expected = [4.36, 4.76, 6.8 ]
aaeq(results, expected, decimal=6)
4 changes: 4 additions & 0 deletions openquake/mbi/cat/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def main(cat_fname, fname_out):
create_folder(os.path.dirname(fname_out))

# Save file
# writing magnitudes more precisely
df.magMw = df.magMw.apply(lambda x: round(x, 5))
df.to_csv(fname_out, index=False)

# Create hmtk file
Expand All @@ -68,6 +70,8 @@ def main(cat_fname, fname_out):
tmps = ofle.split('.')
ofle = f'{tmps[0]}_hmtk.csv'
odf = to_hmtk_catalogue(df)
# writing magnitudes more precisely
odf.magnitude = odf.magnitude.apply(lambda x: round(x, 2))
odf.to_csv(os.path.join(odir, ofle), index=False)


Expand Down
Loading

0 comments on commit 96d1ebc

Please sign in to comment.