-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Interpolate and group ais #8
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ install_requires = | |
onc | ||
sqlalchemy | ||
spans | ||
wget | ||
include_package_data = True | ||
zip_safe = False | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
from pathlib import Path | ||
from functools import lru_cache | ||
|
||
import wget | ||
import pandas as pd | ||
import numpy as np | ||
import spans | ||
|
@@ -32,6 +33,9 @@ | |
|
||
from . import _persistence | ||
|
||
# from zipfile import ZipFile | ||
|
||
|
||
ais_site = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/" | ||
onc = ONC( | ||
_persistence.load_user_token(), | ||
|
@@ -51,6 +55,8 @@ def download_ships(year: int, month: int, zone: int) -> None: | |
month (int): month to download | ||
zone (int): UTM zone to download | ||
""" | ||
_persistence._init_data_folder() | ||
_persistence._init_ais_db(_persistence.AIS_DB) | ||
_persistence.init_data_folder() | ||
_persistence.init_ais_db(_persistence.AIS_DB) | ||
if (year, month, zone) not in _get_ais_downloads(_persistence.AIS_DB): | ||
|
@@ -80,8 +86,13 @@ def _download_ais_to_temp(year: int, month: int, zone: int) -> Path: | |
Returns: | ||
location of download result | ||
""" | ||
# morgan | ||
pass | ||
# JMSH: morgan. MWM, 07/23/2021: Done. | ||
url = ( | ||
f"https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2015/AIS_{year}_ " | ||
f" {month}_{zone}.zip" | ||
) | ||
wget.download(url, _persistence.AIS_TEMP_DIR) | ||
return _persistence.AIS_TEMP_DIR | ||
|
||
|
||
def _unzip_ais(zipfile: Path) -> Tuple[Path]: | ||
|
@@ -94,7 +105,7 @@ def _unzip_ais(zipfile: Path) -> Tuple[Path]: | |
tuple comprising the root of the unzip tree and the specific | ||
unzipped file of interest | ||
""" | ||
# morgan | ||
# JMSH: morgan. | ||
pass | ||
|
||
|
||
|
@@ -525,32 +536,69 @@ def _interpolate_and_group_ais( | |
"""Interpolate the lat/lon of ships to the specified time. | ||
|
||
Interpolation rules: | ||
A ship has observations near the specified time, before and | ||
C1: A ship has observations near the specified time, before and | ||
after: linear interpolation | ||
A ship has one observation very near the specified time, either | ||
C2: A ship has one observation very near the specified time, either | ||
before or after, but not both: constant interpolation | ||
A ship does not meet above criteria: do not create an | ||
C3: A ship does not meet above criteria: do not create an | ||
interpolated record for this ship at this time | ||
|
||
Note: | ||
What counts as "near" and "very near" is subject to change and | ||
may be refactored out into an interpolation parameters object | ||
|
||
MWM: "near" = entries before and after the given time, | ||
"very near" = one entry within a neighborhood of the given time | ||
|
||
Arguments: | ||
ais_df: ship records, including a basedatetime column. | ||
times: when to interpolate the ship positions. | ||
|
||
Returns: | ||
The interpolated records, grouped by time. | ||
""" | ||
# Morgan, you'll have to first groupby mmsi (ship unique id) and | ||
# 1.) groupby mmsi (ship unique id) | ||
# then apply an interpolation function for each timepoint. The | ||
# interpolation function will take a dataframe and a timepoint, | ||
# and will determine, based on the nearest records before/after | ||
# the timepoint, which interpolation rule to apply. | ||
# | ||
# While this function sounds like it takes a long time, its ok at | ||
# the outset to accomplish this somewhat inefficiently. | ||
mmsi_set = ais_df.groupby["mmsi"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. groupby is a function, needs parentheses (e.g. Also, be careful naming things |
||
for mmsi in mmsi_set.groups(): | ||
mmsi | ||
# group = mmsi_set.get_group(mmsi) | ||
# intrp = mmsi_set.apply(_ais_interpolator_dispatcher) | ||
pass | ||
|
||
|
||
def _ais_interpolator_dispatcher(mmsi: pd.DataFrame, time, delta): | ||
"""Assess which case to apply from interpolation rules of | ||
_interpolate_and_group_ais() docstring and dispatch to helper functions. | ||
|
||
Arguments: | ||
group_df: single-mmsi subset of ship records, with basedatetime column. | ||
time: when to interpolate the ship positions. | ||
delta: the "very near" threshold | ||
Comment on lines
+582
to
+583
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to do this for a list of times, so either Also, you need deltas for the "near" and "very near" thresholds. |
||
""" | ||
time_col = mmsi["BaseDateTime"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try something like this, but faster: for target_time in times:
last_before = max(obs_time - target_time for obs_time in time_col if obs_time < target_time)
first_after = min(obs_time - target_time for obs_time in time_col if obs_time > target_time)
if (
last_before
and target_time - last_before < near
and first_after
and first_after - target_time < near
):
# C1
elif last_before and target_time - last_before < very_near:
# C2
elif first_after and first_after - target_time < very_near:
# C2 |
||
if all(time_col - time < 0): # C1: time after whole record | ||
pass | ||
elif all(time_col - time > 0): # C1: time before whole record | ||
pass | ||
else: # C2 or C3 | ||
c2 = (time_col.rsub(time) >= -delta) & (time_col.rsub(time) <= delta) | ||
if c2.sum() > 0: # c2.sum() > 1 not impossible for arbitrary delta | ||
# constant Interpolation | ||
mmsi[c2] | ||
pass | ||
else: | ||
# linear interpolation | ||
# TODO: this assumes chronological ordering of masked subsets | ||
# before = mmsi[time_col - time > 0].iloc[-1] | ||
# after = mmsi[time_col - time < 0].iloc[0] | ||
pass | ||
pass | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from tehom import downloads, _persistence | ||
|
||
|
||
def test_download_ais_to_temp(declare_stateful): | ||
year = 2014 | ||
month = 1 | ||
zone = 1 | ||
downloads._download_ais_to_temp(year, month, zone) | ||
path = _persistence.AIS_TEMP_DIR / f"{year}_{month}_{zone}.zip" | ||
assert path.exists() | ||
Comment on lines
+1
to
+10
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrong branch |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what the point of this is. These aren't definitions, despite the equals sign. These are cases when each term is used.