Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

48 split reading into init and separate read call #53

Closed
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ with engines['csv_timeseries'].open(
filename=TEST_FILE,
filters={'countries': {include=['NO']}}
) as ts:
ts.read()
for var in ts.variables():
# stations
ts.data(var).stations
Expand Down Expand Up @@ -79,6 +80,7 @@ import pyaro.timeseries
TEST_FILE = "csvReader_testdata.csv"
engine = pyaro.list_timeseries_engines()["csv_timeseries"]
ts = engine.open(TEST_FILE, filters=[], fill_country_flag=False)
ts.read()
print(ts.variables())
# stations
ts.data('SOx').stations
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyaro
version = 0.0.13.dev0
version = 0.0.14.dev0
author = Heiko Klein, Daniel Heinesen
author_email = [email protected]
description = pyaro py-aerocom reader objects
Expand Down
93 changes: 62 additions & 31 deletions src/pyaro/csvreader/CSVTimeseriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import glob
import logging
import os
from datetime import datetime

import numpy as np

import pyaro.timeseries.AutoFilterReaderEngine
from pyaro.timeseries import Data, Flag, NpStructuredData, Station
from pyaro.timeseries import Data, NpStructuredData, Station

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -35,26 +36,26 @@ class CSVTimeseriesReader(pyaro.timeseries.AutoFilterReaderEngine.AutoFilterRead
)

def __init__(
self,
filename,
columns={
"variable": 0,
"station": 1,
"longitude": 2,
"latitude": 3,
"value": 4,
"units": 5,
"start_time": 6,
"end_time": 7,
"altitude": "0",
"country": "NO",
"standard_deviation": "NaN",
"flag": "0",
},
variable_units={"SOx": "Gg", "NOx": "Mg"},
country_lookup=False,
csvreader_kwargs={"delimiter": ","},
filters=[],
self,
filename,
columns={
"variable": 0,
"station": 1,
"longitude": 2,
"latitude": 3,
"value": 4,
"units": 5,
"start_time": 6,
"end_time": 7,
"altitude": "0",
"country": "NO",
"standard_deviation": "NaN",
"flag": "0",
},
variable_units={"SOx": "Gg", "NOx": "Mg"},
country_lookup=False,
csvreader_kwargs={"delimiter": ","},
filters=[],
):
"""open a new csv timeseries-reader

Expand Down Expand Up @@ -86,17 +87,41 @@ def __init__(
self._set_filters(filters)
self._extra_metadata = tuple(set(columns.keys()) - set(self.col_keys()))
if country_lookup:
lookupISO2 = _lookup_function()
self._lookupISO2 = _lookup_function()
else:
lookupISO2 = None
self._lookupISO2 = None
self._filename = filename
self._columns = columns
self._variable_units = variable_units
self._csvreader_kwargs = csvreader_kwargs

def read(self):
"""read method"""

for path in self._file_iterator:
logger.debug("%s: %s", filename, path)
logger.debug("%s: %s", self._filename, path)
self._read_single_file(
path, columns, variable_units, lookupISO2, csvreader_kwargs
path, self._columns, self._variable_units, self._lookupISO2, self._csvreader_kwargs
)

def read_revisiondate(self, filename):
"""quick way of getting the revision date"""
if os.path.isdir(filename):
filename = "glob:" + filename + "/*.csv"
if filename.startswith("glob:"):
_file_iterator = glob.iglob(filename[5:], recursive=True)
else:
return datetime.fromtimestamp(os.path.getmtime(filename))

newest_date = datetime.fromtimestamp(0)
for path in _file_iterator:
filedate = datetime.fromtimestamp(os.path.getmtime(path))
if filedate > newest_date:
newest_date = filedate
return newest_date

def _read_single_file(
self, filename, columns, variable_units, country_lookup, csvreader_kwargs
self, filename, columns, variable_units, country_lookup, csvreader_kwargs
):
with open(filename, newline="") as csvfile:
crd = csv.reader(csvfile, **csvreader_kwargs)
Expand All @@ -115,11 +140,11 @@ def _read_single_file(
extra_metadata[t] = row[columns[t]]

for t in (
"value",
"latitude",
"longitude",
"altitude",
"standard_deviation",
"value",
"latitude",
"longitude",
"altitude",
"standard_deviation",
):
r[t] = float(r[t])
for t in ("start_time", "end_time"):
Expand Down Expand Up @@ -208,3 +233,9 @@ def description(self):

def url(self):
return "https://github.com/metno/pyaro"

def read(self):
return self.reader_class().read(*args, **kwargs)

def read_revisiondate(self, filename):
return self.reader_class().read_revisiondate(self, filename)
10 changes: 10 additions & 0 deletions src/pyaro/timeseries/Engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ def open(self, filename_or_obj_or_url, *, filters=None):
"""
pass

@abc.abstractmethod
def read(self):
"""read-method of the timeseries

:return pyaro.timeseries.Reader
:raises UnknownFilterException
"""
# yield self
pass

@property
@abc.abstractmethod
def args(self) -> list[str]:
Expand Down
16 changes: 14 additions & 2 deletions src/pyaro/timeseries/Reader.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import abc

from .Data import Data
from .Station import Station
from .Filter import Filter, filters


class Reader(abc.ABC):
"""Baseclass for timeseries. This can be used with a context manager"""

@abc.abstractmethod
def __init__(self, filename_or_obj_or_url, *, filters=None):
def __init__(self, filename_or_obj_or_url, filters=None, **kwargs):
"""Initialize the reader.

This function is usually called from the Engine's open function.
Expand All @@ -19,6 +19,18 @@ def __init__(self, filename_or_obj_or_url, *, filters=None):
"""
pass

@abc.abstractmethod
def read(self):
"""define read method. All needed parameters should be put into self
by the __init__ method

This function is usually called after the Engine's open function.
Should implement context manager
"""
# yield self
pass

@abc.abstractmethod
def metadata(self) -> dict[str, str]:
"""Metadata set by the datasource.

Expand Down
22 changes: 19 additions & 3 deletions src/pyaro/timeseries/Wrappers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from .Reader import Reader
from .Data import Data


class VariableNameChangingReader(Reader):
"""A pyaro.timeseries.Reader wrapper taking a real Reader implementation and
changing variable names in the original reader. Exampel:
changing variable names in the original reader. Example:

with VariableNameChangingReader(pyaro.open_timeseries(file, filters=[]),
{'SOx': 'oxidised_sulphur'}) as ts:
Expand All @@ -14,7 +13,7 @@ class VariableNameChangingReader(Reader):

"""

def __init__(self, reader: Reader, reader_to_new: dict[str, str]):
def __init__(self, reader: Reader, reader_to_new: dict[str, str], **kwargs, ):
"""Initialize the variable name changes of Reader

:param reader: The Reader instance to change variable names on
Expand Down Expand Up @@ -45,6 +44,15 @@ def data(self, varname):
data._set_variable(varname)
return data

def metadata(self):
"""Get the metadata from the reader
NOT changing the variable name to the newly given ones for the moment

:return: metadata from the original reader class
"""
metadata = self._reader.metadata()
return metadata

def stations(self):
return self._reader.stations()

Expand All @@ -57,3 +65,11 @@ def variables(self):

def close(self):
self._reader.close()

def read(self, ):
"""define read method. All needed parameters should be put into self
by the __init__ method

This method is called after the Engine's open function.
"""
return self._reader.read()
Loading
Loading