Skip to content

Commit

Permalink
add v2 version
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Oct 23, 2024
1 parent 15bd242 commit 270f1fd
Show file tree
Hide file tree
Showing 13 changed files with 1,036 additions and 172 deletions.
2 changes: 1 addition & 1 deletion ADLoc
Submodule ADLoc updated 3 files
+9 −2 adloc/adloc.py
+8 −4 adloc/data.py
+23 −10 adloc/utils.py
2 changes: 1 addition & 1 deletion CCTorch
Submodule CCTorch updated 2 files
+62 −1 cctorch/data.py
+171 −148 run.py
2 changes: 1 addition & 1 deletion PhaseNet
3 changes: 3 additions & 0 deletions scripts/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def parse_args():
parser.add_argument("--num_nodes", type=int, default=1, help="number of nodes")
parser.add_argument("--node_rank", type=int, default=0, help="node rank")

## ADLOC
parser.add_argument("--iter", type=int, default=0, help="iteration")

## CCTorch
parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py")

Expand Down
4 changes: 3 additions & 1 deletion scripts/cut_templates_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,9 @@ def cut_templates(root_path, region, config):

xmin, ymin = proj(config["minlongitude"], config["minlatitude"])
xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"])
zmin, zmax = config["mindepth"], config["maxdepth"]
# zmin, zmax = config["mindepth"], config["maxdepth"]
zmin = config["mindepth"] if "mindepth" in config else 0.0
zmax = config["maxdepth"] if "maxdepth" in config else 60.0
config["xlim_km"] = (xmin, xmax)
config["ylim_km"] = (ymin, ymax)
config["zlim_km"] = (zmin, zmax)
Expand Down
5 changes: 4 additions & 1 deletion scripts/download_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def parse_inventory_csv(inventory, mseed_ids=[]):
sensor_description = channel.sensor.description
channel_list.append(
{
"station_id": f"{network.code}.{station.code}.{channel.location_code}.{channel.code[:-1]}",
"network": network.code,
"station": station.code,
"location": channel.location_code,
Expand Down Expand Up @@ -247,7 +248,9 @@ def parse_inventory_csv(inventory, mseed_ids=[]):
tmp["provider"] = provider
stations.append(tmp)
stations = pd.concat(stations)
stations = stations.groupby(["network", "station", "location", "channel"], dropna=False).first().reset_index()
# stations = stations.groupby(["network", "station", "location", "channel"], dropna=False).first().reset_index()
stations = stations.sort_values(by=["station_id", "channel"])
stations = stations.groupby(["station_id"], dropna=False).first().reset_index()
print(f"Merged {len(stations)} channels")
stations.to_csv(f"{root_path}/{result_dir}/stations.csv", index=False)
if protocol != "file":
Expand Down
94 changes: 94 additions & 0 deletions scripts/merge_gamma_picks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# %%
import json
import multiprocessing as mp
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta, timezone
from threading import Lock, Thread

import fsspec
import numpy as np
import pandas as pd
import pyproj
from obspy import read_inventory
from obspy.clients.fdsn import Client
from sklearn.cluster import DBSCAN
from tqdm import tqdm
from args import parse_args
from glob import glob


def load_data(year, jday, data_path, root_path, bucket, protocol, token):

fs = fsspec.filesystem(protocol, token=token)
adloc_events_csv = f"{data_path}/{year:04d}/adloc_events_{jday:03d}.csv"
adloc_picks_csv = f"{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv"
if protocol == "file":
events = pd.read_csv(f"{root_path}/{adloc_events_csv}", parse_dates=["time"])
picks = pd.read_csv(f"{root_path}/{adloc_picks_csv}", parse_dates=["phase_time"])
else:
with fs.open(f"{bucket}/{adloc_events_csv}", "r") as fp:
events = pd.read_csv(fp, parse_dates=["time"])
with fs.open(f"{bucket}/{adloc_picks_csv}", "r") as fp:
picks = pd.read_csv(fp, parse_dates=["phase_time"])

events["year"] = year
events["jday"] = jday
picks["year"] = year
picks["jday"] = jday

return events, picks


# %%
if __name__ == "__main__":

args = parse_args()
root_path = args.root_path
region = args.region

data_path = f"{region}/gamma"
result_path = f"{region}/gamma"

# %%
# protocol = "gs"
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
# with open(token_json, "r") as fp:
# token = json.load(fp)
# fs = fsspec.filesystem(protocol, token=token)

# %%
event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events.csv"))

# %%
events = []
picks = []
for event_csv in tqdm(event_csvs, desc="Load event csvs"):
pick_csv = event_csv.replace("events.csv", "picks.csv")
year, jday = event_csv.split("/")[-1].split(".")[:2]
events_ = pd.read_csv(event_csv, dtype=str)
picks_ = pd.read_csv(pick_csv, dtype=str)
events_["year"] = year
events_["jday"] = jday
picks_["year"] = year
picks_["jday"] = jday
events.append(events_)
picks.append(picks_)

events = pd.concat(events, ignore_index=True)
picks = pd.concat(picks, ignore_index=True)

events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"]
picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"]

events["event_index"] = np.arange(len(events))
picks = picks.drop("event_index", axis=1)
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")

events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)

events.to_csv(f"{root_path}/{result_path}/gamma_events.csv", index=False)
picks.to_csv(f"{root_path}/{result_path}/gamma_picks.csv", index=False)

# %%
136 changes: 136 additions & 0 deletions scripts/merge_phasenet_picks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# %%
import json
import multiprocessing as mp
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta, timezone
from threading import Lock, Thread

import fsspec
import numpy as np
import pandas as pd
import pyproj
from obspy import read_inventory
from obspy.clients.fdsn import Client
from sklearn.cluster import DBSCAN
from tqdm import tqdm
from args import parse_args
from glob import glob


def scan_csv(year, root_path, fs=None, bucket=None, protocol="file"):
# %%
csv_list = []
if protocol != "file":
jdays = fs.ls(f"{bucket}/{region}/{folder}/{year}")
else:
jdays = os.listdir(f"{root_path}/{region}/phasenet/picks/{year}/")

for jday in jdays:
if protocol != "file":
csvs = fs.glob(f"{jday}/??/*.csv")
else:
csvs = glob(f"{root_path}/{region}/phasenet/picks/{year}/{jday}/??/*.csv")

csv_list.extend([[year, jday, csv] for csv in csvs])

csvs = pd.DataFrame(csv_list, columns=["year", "jday", "csv"])
csv_file = f"{root_path}/{region}/phasenet/csv_list_{year}.csv"
csvs.to_csv(csv_file, index=False)

return csv_file


# %%
def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):

picks = []
for i, row in rows.iterrows():
# if fs.info(row["csv"])["size"] == 0:
# continue
# with fs.open(row["csv"], "r") as f:
# picks_ = pd.read_csv(f, dtype=str)
if os.path.getsize(row["csv"]) == 0:
continue
with open(row["csv"], "r") as f:
picks_ = pd.read_csv(f, dtype=str)
picks.append(picks_)

if len(picks) > 0:
picks = pd.concat(picks, ignore_index=True)
if not os.path.exists(f"{root_path}/{region}/phasenet/{year}"):
os.makedirs(f"{root_path}/{region}/phasenet/{year}", exist_ok=True)
picks.to_csv(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", index=False)
# fs.put(
# f"{root_path}/{region}/phasenet/{year}/{jday}/{year}.{jday}.csv",
# f"{bucket}/{region}/phasenet_merged/{year}/{year}.{jday}.csv",
# )
else:
with open(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", "w") as f:
f.write("")


# %%
if __name__ == "__main__":

args = parse_args()
root_path = args.root_path
region = args.region

data_path = f"{region}/phasenet/picks"
result_path = f"{region}/phasenet"

# %%
# protocol = "gs"
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
# with open(token_json, "r") as fp:
# token = json.load(fp)
# fs = fsspec.filesystem(protocol, token=token)

# %%
years = os.listdir(f"{root_path}/{region}/phasenet/picks")

for year in years:

csv_list = scan_csv(year, root_path)

# %%
csv_list = pd.read_csv(csv_list, dtype=str)

# for jday, csvs in csv_list.groupby("jday"):
# read_csv(csvs, region, year, jday, root_path)
# raise

# ncpu = os.cpu_count()
ncpu = 64
print(f"Number of processors: {ncpu}")
csv_by_jday = csv_list.groupby("jday")
pbar = tqdm(total=len(csv_by_jday), desc=f"Loading csv files (year {year})")

# with mp.Pool(ncpu) as pool:
ctx = mp.get_context("spawn")
with ctx.Pool(ncpu) as pool:
jobs = []
for jday, csvs in csv_by_jday:
job = pool.apply_async(
read_csv, (csvs, region, year, jday, root_path), callback=lambda _: pbar.update()
)
jobs.append(job)
pool.close()
pool.join()
for job in jobs:
output = job.get()
if output:
print(output)

pbar.close()

# %%
csvs = glob(f"{root_path}/{region}/phasenet/????/????.???.csv")
picks = []
for csv in tqdm(csvs, desc="Merge csv files"):
picks.append(pd.read_csv(csv, dtype=str))
picks = pd.concat(picks, ignore_index=True)
picks.to_csv(f"{root_path}/{region}/phasenet/phasenet_picks.csv", index=False)

# %%
2 changes: 1 addition & 1 deletion scripts/plot_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@


# %%
gamma_file = f"{root_path}/{region}/gamma/gamma_events_000_001.csv"
gamma_file = f"{root_path}/{region}/gamma/gamma_events.csv"
gamma_exist = False
if os.path.exists(gamma_file):
print(f"Reading {gamma_file}")
Expand Down
7 changes: 4 additions & 3 deletions scripts/run_adloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def run_adloc(

# %%
data_path = f"{root_path}/{region}/gamma"
picks_file = os.path.join(data_path, f"gamma_picks_{node_rank:03d}_{num_nodes:03d}.csv")
events_file = os.path.join(data_path, f"gamma_events_{node_rank:03d}_{num_nodes:03d}.csv")
picks_file = os.path.join(data_path, f"gamma_picks.csv")
events_file = os.path.join(data_path, f"gamma_events.csv")
# stations_file = os.path.join(data_path, "stations.csv")
stations_file = f"{root_path}/{region}/obspy/stations.json"

Expand All @@ -66,7 +66,7 @@ def run_adloc(
stations.reset_index(drop=True, inplace=True)

config["mindepth"] = config["mindepth"] if "mindepth" in config else 0.0
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 30.0
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 60.0
config["use_amplitude"] = True

# ## Eikonal for 1D velocity model
Expand Down Expand Up @@ -203,6 +203,7 @@ def run_adloc(
for iter in range(MAX_SST_ITER):
# picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter)
picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)

station_term_amp = (
picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
)
Expand Down
Loading

0 comments on commit 270f1fd

Please sign in to comment.