diff --git a/.gitignore b/.gitignore index ca8204ba..ab1f58ce 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,78 @@ docs/tutorials/data tests/integration/data .ruff_cache -# OS X +notebooks/data/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Created by https://www.toptal.com/developers/gitignore/api/macos +# Edit at https://www.toptal.com/developers/gitignore?templates=macos + +### macOS ### +# General .DS_Store +.AppleDouble +.LSOverride -notebooks/data/ -.vscode +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +# End of https://www.toptal.com/developers/gitignore/api/macos + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode + +### VisualStudioCode ### +.vscode/ + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode + +# Created by https://www.toptal.com/developers/gitignore/api/direnv +# Edit at https://www.toptal.com/developers/gitignore?templates=direnv + +### direnv ### +.direnv +.envrc + +# End of https://www.toptal.com/developers/gitignore/api/direnv diff --git a/CHANGELOG.md b/CHANGELOG.md index a2002d23..fe0efb32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ * fixed 483 by extracting a common CMR query method for collections and granules using SearchAfter header * Added VCR support for verifying the API call to CMR and the parsing of returned results without relying on CMR availability post development +* Enhancements: + * Corrected and enhanced static type hints for functions and methods that make + CMR queries or handle CMR query results (#508) + ## [v0.9.0] 2024-02-28 * Bug fixes: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 300b5387..a8415a1a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,6 +84,15 @@ Finally, for _development dependencies only_, you must add an entry to make format lint ``` +We attempt to provide comprehensive type annotations within this repository. If +you do not provide fully annotated functions or methods, the `lint` command will +fail. Over time, we plan to increase type-checking strictness in order to +ensure more precise, beneficial type annotations. + +We have included type stubs for the untyped `python-cmr` library, which we +intend to eventually upstream. Since `python-cmr` exposes the `cmr` package, +the stubs appear under `stubs/cmr`. + ### Requirements to merge code (Pull Request Process) - you must include test coverage diff --git a/ci/environment-mindeps.yaml b/ci/environment-mindeps.yaml index 75037c51..cb8fd367 100644 --- a/ci/environment-mindeps.yaml +++ b/ci/environment-mindeps.yaml @@ -17,11 +17,14 @@ dependencies: - multimethod=1.8 - python-dateutil=2.8.2 - importlib-resources=6.3.2 + - typing-extensions=4.10.0 # test dependencies - responses - pytest - pytest-cov + - python-magic - mypy + - types-python-dateutil - types-requests - types-setuptools - ruff diff --git a/earthaccess/api.py b/earthaccess/api.py index a7d35fb0..796dbcb5 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,13 +1,12 @@ -from typing import Any, Dict, List, Optional, Type, Union - import requests import s3fs from fsspec import AbstractFileSystem +from typing_extensions import Any, Dict, List, Optional, Union import earthaccess from .auth import Auth -from .results import DataGranule +from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store from .utils import _validation as validate @@ -28,9 +27,7 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: return location -def search_datasets( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataCollection]: +def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -54,6 +51,9 @@ def search_datasets( A list of DataCollection results that can be used to get information about a dataset, e.g. concept_id, doi, etc. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_datasets( @@ -78,9 +78,7 @@ def search_datasets( return query.get_all() -def search_data( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataGranule]: +def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -104,6 +102,9 @@ def search_data( a list of DataGranules that can be used to access the granule files by using `download()` or `open()`. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_data( @@ -178,6 +179,9 @@ def download( Returns: List of downloaded files + + Raises: + Exception: A file download failed. """ provider = _normalize_location(provider) if isinstance(granules, DataGranule): @@ -194,7 +198,7 @@ def download( def open( - granules: Union[List[str], List[earthaccess.results.DataGranule]], + granules: Union[List[str], List[DataGranule]], provider: Optional[str] = None, ) -> List[AbstractFileSystem]: """Returns a list of fsspec file-like objects that can be used to access files @@ -216,7 +220,7 @@ def open( def get_s3_credentials( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[List[earthaccess.results.DataGranule]] = None, + results: Optional[List[DataGranule]] = None, ) -> Dict[str, Any]: """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets. We can use the daac name, the provider, or a list of results from earthaccess.search_data(). @@ -239,7 +243,7 @@ def get_s3_credentials( return earthaccess.__auth__.get_s3_credentials(daac=daac, provider=provider) -def collection_query() -> Type[CollectionQuery]: +def collection_query() -> CollectionQuery: """Returns a query builder instance for NASA collections (datasets). Returns: @@ -252,7 +256,7 @@ def collection_query() -> Type[CollectionQuery]: return query_builder -def granule_query() -> Type[GranuleQuery]: +def granule_query() -> GranuleQuery: """Returns a query builder instance for data granules Returns: @@ -311,7 +315,7 @@ def get_requests_https_session() -> requests.Session: def get_s3fs_session( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[earthaccess.results.DataGranule] = None, + results: Optional[DataGranule] = None, ) -> s3fs.S3FileSystem: """Returns a fsspec s3fs file session for direct access when we are in us-west-2. diff --git a/earthaccess/py.typed b/earthaccess/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/earthaccess/search.py b/earthaccess/search.py index 335389fe..fadd7b55 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -1,48 +1,71 @@ import datetime as dt from inspect import getmembers, ismethod -from typing import Any, List, Optional, Tuple, Type, Union -import dateutil.parser as parser # type: ignore +import dateutil.parser as parser import requests +from typing_extensions import ( + Any, + List, + Optional, + Self, + Sequence, + SupportsFloat, + Tuple, + TypeAlias, + Union, + override, +) + from cmr import CollectionQuery, GranuleQuery -# type: ignore from .auth import Auth from .daac import find_provider, find_provider_by_shortname from .results import DataCollection, DataGranule +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] + def get_results( - query: Union[CollectionQuery, GranuleQuery], limit: int = 2000 + session: requests.Session, + query: Union[CollectionQuery, GranuleQuery], + limit: int = 2000, ) -> List[Any]: """ Get all results up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, if the supplied value is greater then the Search-After header - will be used to iterate across multiple requests until either the limit has been reached - or there are no more results. + The default page size is 2000, if the supplied value is greater then the + Search-After header will be used to iterate across multiple requests until + either the limit has been reached or there are no more results. + Parameters: limit: The number of results to return Returns: query results as a list + + Raises: + RuntimeError: The CMR query failed. """ page_size = min(limit, 2000) url = query._build_url() - results: List = [] + results: List[Any] = [] more_results = True headers = dict(query.headers or {}) + while more_results: - response = requests.get(url, headers=headers, params={"page_size": page_size}) - headers["cmr-search-after"] = response.headers.get("cmr-search-after") + response = session.get(url, headers=headers, params={"page_size": page_size}) + + if cmr_search_after := response.headers.get("cmr-search-after"): + headers["cmr-search-after"] = cmr_search_after try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex latest = response.json()["items"] @@ -56,39 +79,46 @@ def get_results( class DataCollections(CollectionQuery): """ ???+ Info - The DataCollection class queries against https://cmr.earthdata.nasa.gov/search/collections.umm_json, + The DataCollection class queries against + https://cmr.earthdata.nasa.gov/search/collections.umm_json, the response has to be in umm_json to use the result classes. """ - _fields = None + _fields: Optional[List[str]] = None _format = "umm_json" def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: - """Builds an instance of DataCollections to query CMR + """Builds an instance of DataCollections to query the CMR. Parameters: auth: An authenticated `Auth` instance. This is an optional parameter for queries that need authentication, e.g. restricted datasets. """ super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False self.params["has_granules"] = True self.params["include_granule_counts"] = True + @override def hits(self) -> int: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. Restricted datasets will always return zero results even if there are results. - Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -97,60 +127,77 @@ def hits(self) -> int: try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex return int(response.headers["CMR-Hits"]) + @override def get(self, limit: int = 2000) -> List[DataCollection]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: limit: The number of results to return Returns: - query results as a list of `DataCollection` instances. + Query results as a (possibly empty) list of `DataCollection` instances. + + Raises: + RuntimeError: The CMR query failed. """ - return list( + return [ DataCollection(collection, self._fields) - for collection in get_results(self, limit) - ) + for collection in get_results(self.session, self, limit) + ] - def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: + @override + def concept_id(self, IDs: Sequence[str]) -> Self: """Filter by concept ID. - For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS + + For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, + S12345678-LPDAAC_ECS Collections, granules, tools, services are uniquely identified with this ID. - > - * If providing a collection's concept ID here, it will filter by granules associated with that collection. - * If providing a granule's concept ID here, it will uniquely identify those granules. - * If providing a tool's concept ID here, it will uniquely identify those tools. - * If providing a service's concept ID here, it will uniquely identify those services. + + * If providing a collection's concept ID, it will filter by granules associated + with that collection. + * If providing a granule's concept ID, it will uniquely identify those granules. + * If providing a tool's concept ID, it will uniquely identify those tools. + * If providing a service's concept ID, it will uniquely identify those services. Parameters: IDs: ID(s) to search by. Can be provided as a string or list of strings. + + Returns: + self + + Raises: + ValueError: An ID does not start with a valid prefix. """ - super().concept_id(IDs) - return self + return super().concept_id(IDs) - def keyword(self, text: str) -> Type[CollectionQuery]: + @override + def keyword(self, text: str) -> Self: """Case-insensitive and wildcard (*) search through over two dozen fields in a CMR collection record. This allows for searching against fields like summary and science keywords. Parameters: text: text to search for + + Returns: + self """ - super().keyword(text) - return self + return super().keyword(text) - def doi(self, doi: str) -> Type[CollectionQuery]: + def doi(self, doi: str) -> Self: """Search datasets by DOI. ???+ Tip @@ -160,6 +207,12 @@ def doi(self, doi: str) -> Type[CollectionQuery]: Parameters: doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + + Returns: + self + + Raises: + TypeError: `doi` is not of type `str`. """ if not isinstance(doi, str): raise TypeError("doi must be of type str") @@ -167,8 +220,8 @@ def doi(self, doi: str) -> Type[CollectionQuery]: self.params["doi"] = doi return self - def instrument(self, instrument: str) -> Type[CollectionQuery]: - """Searh datasets by instrument + def instrument(self, instrument: str) -> Self: + """Searh datasets by instrument. ???+ Tip Not all datasets have an associated instrument. This works @@ -176,6 +229,12 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: Parameters: instrument (String): instrument of a datasets, e.g. instrument=GEDI + + Returns: + self + + Raises: + TypeError: `instrument` is not of type `str`. """ if not isinstance(instrument, str): raise TypeError("instrument must be of type str") @@ -183,8 +242,8 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: self.params["instrument"] = instrument return self - def project(self, project: str) -> Type[CollectionQuery]: - """Searh datasets by associated project + def project(self, project: str) -> Self: + """Searh datasets by associated project. ???+ Tip Not all datasets have an associated project. This works @@ -193,6 +252,12 @@ def project(self, project: str) -> Type[CollectionQuery]: Parameters: project (String): associated project of a datasets, e.g. project=EMIT + + Returns: + self + + Raises: + TypeError: `project` is not of type `str`. """ if not isinstance(project, str): raise TypeError("project must be of type str") @@ -200,22 +265,29 @@ def project(self, project: str) -> Type[CollectionQuery]: self.params["project"] = project return self - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: + @override + def parameters(self, **kwargs: Any) -> Self: """Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` + Returns: - Query instance + self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ - methods = {} - for name, func in getmembers(self, predicate=ismethod): - methods[name] = func + methods = dict(getmembers(self, predicate=ismethod)) for key, val in kwargs.items(): # verify the key matches one of our methods @@ -236,33 +308,48 @@ def print_help(self, method: str = "fields") -> None: print([method for method in dir(self) if method.startswith("_") is False]) help(getattr(self, method)) - def fields(self, fields: Optional[List[str]] = None) -> Type[CollectionQuery]: + def fields(self, fields: Optional[List[str]] = None) -> Self: """Masks the response by only showing the fields included in this list. Parameters: - fields (List): list of fields to show, these fields come from the UMM model e.g. Abstract, Title + fields (List): list of fields to show. These fields come from the UMM model + (e.g. Abstract, Title). + + Returns: + self """ self._fields = fields return self - def debug(self, debug: bool = True) -> Type[CollectionQuery]: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + def debug(self, debug: bool = True) -> Self: + """If True, prints the actual query to CMR. Note that the pagination happens in + the headers. Parameters: - debug (Boolean): Print CMR query. + debug (Boolean): If `True`, print the CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: - """Only match granules that are hosted in the cloud. This is valid for public collections. + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: + """Only match granules that are hosted in the cloud. This is valid for public + collections. ???+ Tip Cloud hosted collections can be public or restricted. Restricted collections will not be matched using this parameter Parameters: - cloud_hosted: True to require granules only be online + cloud_hosted: If `True`, obtain only cloud-hosted collections. + + Returns: + self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -273,32 +360,43 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. - E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for on-premises data, - POCLOUD is the PODAAC provider for their data in the cloud. + E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for + on-premises data, POCLOUD is the PODAAC provider for their data in the cloud. Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()`. + def data_center(self, data_center_name: str) -> Self: + """An alias for the `daac` method. Parameters: data_center_name: DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC, by default the on-prem collections for the DAAC. + def daac(self, daac_short_name: str) -> Self: + """Only match collections for a given DAAC, by default the on-prem collections + for the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -308,20 +406,31 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[CollectionQuery]: - """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls - to this method before calling execute(). + ) -> Self: + """Filter by an open or closed date range. Dates can be provided as datetime + objects or ISO 8601 formatted strings. Multiple ranges can be provided by + successive calls to this method before calling execute(). Parameters: date_from (String or Datetime object): earliest date of temporal range date_to (String or Datetime object): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range. + exclude_boundary (Boolean): whether or not to exclude the date_from/to in + the matched range. + + Returns: + self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -338,8 +447,7 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) class DataGranules(GranuleQuery): @@ -350,22 +458,30 @@ class DataGranules(GranuleQuery): _format = "umm_json" - def __init__(self, auth: Any = None, *args: Any, **kwargs: Any) -> None: + def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: """Base class for Granule and Collection CMR queries.""" super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False + @override def hits(self) -> int: """Returns the number of hits the current query will return. - This is done by making a lightweight query to CMR and inspecting the returned headers. + This is done by making a lightweight query to CMR and inspecting the returned + headers. Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -382,40 +498,54 @@ def hits(self) -> int: return int(response.headers["CMR-Hits"]) + @override def get(self, limit: int = 2000) -> List[DataGranule]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: - limit: The number of results to return + limit: The number of results to return. Returns: - query results as a list of `DataGranules` instances. + Query results as a (possibly empty) list of `DataGranules` instances. + + Raises: + RuntimeError: The CMR query failed. """ - response = get_results(self, limit) + response = get_results(self.session, self, limit) cloud = self._is_cloud_hosted(response[0]) - return list(DataGranule(granule, cloud_hosted=cloud) for granule in response) + return [DataGranule(granule, cloud_hosted=cloud) for granule in response] - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: - """Provide query parameters as keyword arguments. The keyword needs to match the name - of the method, and the value should either be the value or a tuple of values. + @override + def parameters(self, **kwargs: Any) -> Self: + """Provide query parameters as keyword arguments. The keyword needs to match the + name of the method, and the value should either be the value or a tuple of + values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` Returns: - Query instance + self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ methods = {} for name, func in getmembers(self, predicate=ismethod): @@ -434,7 +564,8 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. For example, PODAAC is a data center or DAAC, @@ -443,23 +574,33 @@ def provider(self, provider: str = "") -> Type[CollectionQuery]: Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()`. + def data_center(self, data_center_name: str) -> Self: + """An alias for the `daac` method. Parameters: data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC. Default to on-prem collections for the DAAC. + def daac(self, daac_short_name: str) -> Self: + """Only match collections for a given DAAC. Default to on-prem collections for + the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -469,18 +610,25 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self - def orbit_number(self, orbit1: int, orbit2: int) -> Type[GranuleQuery]: + @override + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = None, + ) -> Self: """Filter by the orbit number the granule was acquired during. Either a single orbit can be targeted or a range of orbits. Parameter: orbit1: orbit to target (lower limit of range when orbit2 is provided) orbit2: upper limit of range + + Returns: + self """ - super().orbit_number(orbit1, orbit2) - return self + return super().orbit_number(orbit1, orbit2) - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: """Only match granules that are hosted in the cloud. This is valid for public collections and when using the short_name parameter. Concept-Id is unambiguous. @@ -490,7 +638,13 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: Restricted collections will not be matched using this parameter. Parameters: - cloud_hosted: True to require granules only be online + cloud_hosted: If `True`, obtain only granules from cloud-hosted collections. + + Returns: + self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -503,7 +657,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def granule_name(self, granule_name: str) -> Type[CollectionQuery]: + def granule_name(self, granule_name: str) -> Self: """Find granules matching either granule ur or producer granule id, queries using the readable_granule_name metadata field. @@ -513,6 +667,12 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: Parameters: granule_name: granule name (accepts wildcards) + + Returns: + self + + Raises: + TypeError: if `granule_name` is not of type `str` """ if not isinstance(granule_name, str): raise TypeError("granule_name must be of type string") @@ -521,54 +681,90 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: self.params["options[readable_granule_name][pattern]"] = True return self - def online_only(self, online_only: bool = True) -> Type[GranuleQuery]: + @override + def online_only(self, online_only: bool = True) -> Self: """Only match granules that are listed online and not available for download. - The opposite of this method is downloadable(). + The inverse of this method is `downloadable`. Parameters: - online_only: True to require granules only be online + online_only: If `True`, obtain only granules that are online (not + downloadable) + + Returns: + self + + Raises: + TypeError: `online_only` is not of type `bool`. """ - super().online_only(online_only) - return self + return super().online_only(online_only) - def day_night_flag(self, day_night_flag: str) -> Type[GranuleQuery]: + @override + def day_night_flag(self, day_night_flag: str) -> Self: """Filter by period of the day the granule was collected during. Parameters: day_night_flag: "day", "night", or "unspecified" + + Returns: + self + + Raises: + TypeError: `day_night_flag` is not of type `str`. + ValueError: `day_night_flag` is not one of `"day"`, `"night"`, or + `"unspecified"`. """ - super().day_night_flag(day_night_flag) - return self + return super().day_night_flag(day_night_flag) - def instrument(self, instrument: str = "") -> Type[GranuleQuery]: + @override + def instrument(self, instrument: str) -> Self: """Filter by the instrument associated with the granule. Parameters: instrument: name of the instrument + + Returns: + self + + Raises: + ValueError: `instrument` is not a non-empty string. """ - super().instrument(instrument) - return self + return super().instrument(instrument) - def platform(self, platform: str = "") -> Type[GranuleQuery]: + @override + def platform(self, platform: str) -> Self: """Filter by the satellite platform the granule came from. Parameters: platform: name of the satellite + + Returns: + self + + Raises: + ValueError: `platform` is not a non-empty string. """ - super().platform(platform) - return self + return super().platform(platform) + @override def cloud_cover( - self, min_cover: int = 0, max_cover: int = 100 - ) -> Type[GranuleQuery]: + self, + min_cover: Optional[FloatLike] = 0, + max_cover: Optional[FloatLike] = 100, + ) -> Self: """Filter by the percentage of cloud cover present in the granule. Parameters: min_cover: minimum percentage of cloud cover max_cover: maximum percentage of cloud cover + + Returns: + self + + Raises: + ValueError: `min_cover` or `max_cover` is not convertible to a float, + or `min_cover` is greater than `max_cover`. """ - super().cloud_cover(min_cover, max_cover) - return self + return super().cloud_cover(min_cover, max_cover) def _valid_state(self) -> bool: # spatial params must be paired with a collection limiting parameter @@ -593,41 +789,57 @@ def _is_cloud_hosted(self, granule: Any) -> bool: return True return False - def short_name(self, short_name: str = "") -> Type[GranuleQuery]: + @override + def short_name(self, short_name: str) -> Self: """Filter by short name (aka product or collection name). Parameters: short_name: name of a collection Returns: - Query instance + self """ - super().short_name(short_name) - return self + return super().short_name(short_name) - def debug(self, debug: bool = True) -> Type[GranuleQuery]: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + def debug(self, debug: bool = True) -> Self: + """If True, prints the actual query to CMR, notice that the pagination happens + in the headers. Parameters: - debug: Print CMR query. + debug: If `True`, print the CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[GranuleQuery]: + ) -> Self: """Filter by an open or closed date range. - Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple - ranges can be provided by successive calls to this method before calling execute(). + + Dates can be provided as a datetime objects or ISO 8601 formatted strings. + Multiple ranges can be provided by successive calls to this method before + calling execute(). Parameters: date_from: earliest date of temporal range date_to: latest date of temporal range exclude_boundary: whether to exclude the date_from/to in the matched range + + Returns: + self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -644,96 +856,146 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) - def version(self, version: str = "") -> Type[GranuleQuery]: + @override + def version(self, version: str) -> Self: """Filter by version. Note that CMR defines this as a string. For example, MODIS version 6 products must be searched for with "006". Parameters: version: version string + + Returns: + self """ - super().version(version) - return self + return super().version(version) - def point(self, lon: str, lat: str) -> Type[GranuleQuery]: + @override + def point(self, lon: FloatLike, lat: FloatLike) -> Self: """Filter by granules that include a geographic point. Parameters: - lon (String): longitude of geographic point - lat (String): latitude of geographic point + lon: longitude of geographic point + lat: latitude of geographic point + + Returns: + self + + Raises: + ValueError: `lon` or `lat` cannot be converted to a float. """ - super().point(lon, lat) - return self + return super().point(lon, lat) - def polygon(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: - """Filter by granules that overlap a polygonal area. Must be used in combination with a - collection filtering parameter such as short_name or entry_title. + @override + def polygon(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a polygonal area. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: coordinates: list of (lon, lat) tuples + + Returns: + self + + Raises: + ValueError: `coordinates` is not a sequence of at least 4 coordinate + pairs, any of the coordinates cannot be converted to a float, or the + first and last coordinate pairs are not equal. """ - super().polygon(coordinates) - return self + return super().polygon(coordinates) + @override def bounding_box( self, - lower_left_lon: str, - lower_left_lat: str, - upper_right_lon: str, - upper_right_lat: str, - ) -> Type[GranuleQuery]: - """Filter by granules that overlap a bounding box. Must be used in combination with - a collection filtering parameter such as short_name or entry_title. + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Self: + """Filter by granules that overlap a bounding box. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: lower_left_lon: lower left longitude of the box lower_left_lat: lower left latitude of the box upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box + + Returns: + self + + Raises: + ValueError: A coordinate could not be converted to a float. """ - super().bounding_box( + return super().bounding_box( lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat ) - return self - def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: - """Filter by granules that overlap a series of connected points. Must be used in combination - with a collection filtering parameter such as short_name or entry_title. + @override + def line(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a series of connected points. Must be used + in combination with a collection filtering parameter such as short_name or + entry_title. Parameters: coordinates: a list of (lon, lat) tuples + + Returns: + self + + Raises: + ValueError: `coordinates` is not a sequence of at least 2 coordinate + pairs, or a coordinate could not be converted to a float. """ - super().line(coordinates) - return self + return super().line(coordinates) - def downloadable(self, downloadable: bool = True) -> Type[GranuleQuery]: - """Only match granules that are available for download. The opposite of this - method is online_only(). + @override + def downloadable(self, downloadable: bool = True) -> Self: + """Only match granules that are available for download. The inverse of this + method is `online_only`. Parameters: - downloadable: True to require granules be downloadable + downloadable: If `True`, obtain only granules that are downloadable. + + Returns: + self + + Raises: + TypeError: `downloadable` is not of type `bool`. """ - super().downloadable(downloadable) - return self + return super().downloadable(downloadable) - def doi(self, doi: str) -> Type[GranuleQuery]: - """Search data granules by DOI + def doi(self, doi: str) -> Self: + """Search data granules by DOI. ???+ Tip Not all datasets have an associated DOI, internally if a DOI is found earthaccess will grab the concept_id for the query to CMR. Parameters: - doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + doi: DOI of a dataset, e.g. 10.5067/AQR50-3Q7CS + + Returns: + self + + Raises: + RuntimeError: The CMR query to get the collection for the DOI fails. """ + + # TODO consider deferring this query until the search is executed collection = DataCollections().doi(doi).get() + + # TODO consider raising an exception when there are multiple collections, since + # we can't know which one the user wants, and choosing one is arbitrary. if len(collection) > 0: concept_id = collection[0].concept_id() self.params["concept_id"] = concept_id else: + # TODO consider removing this print statement since we don't print such + # a message in other cases where no results are found. Seems arbitrary. print( f"earthaccess couldn't find any associated collections with the DOI: {doi}" ) + return self diff --git a/poetry.lock b/poetry.lock index 5ca33866..f8c4eba9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -3084,8 +3084,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3850,6 +3850,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3857,8 +3858,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3875,6 +3884,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3882,6 +3892,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -5109,4 +5120,4 @@ kerchunk = ["dask", "kerchunk"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "5344a948e7ae73de6bcfd7fa30089469daf6b232e3f0498cc1a47ba860ebb497" +content-hash = "530a3cffb6d044e431ec3671268949e797d3c468c0f653b6fea7c90cdc422b3d" diff --git a/pyproject.toml b/pyproject.toml index 6484f32e..dbe917ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ python-dateutil = ">=2.8.2" kerchunk = { version = ">=0.1.2", optional = true } dask = { version = ">=2022.1.0", optional = true } importlib-resources = ">=6.3.2" +typing_extensions = ">=4.10.0" [tool.poetry.extras] kerchunk = ["kerchunk", "dask"] @@ -67,6 +68,7 @@ pymdown-extensions = ">=9.2" pygments = ">=2.11.1" responses = ">=0.14" ruff = "^0.1.6" +types-python-dateutil = ">=2.8.2" types-requests = ">=0.1" types-setuptools = ">=0.1" ipywidgets = ">=7.7.0" @@ -87,22 +89,39 @@ build-backend = "poetry.core.masonry.api" [tool.pytest] filterwarnings = ["error::UserWarning"] - [tool.mypy] -disallow_untyped_defs = false -ignore_missing_imports = true +mypy_path = ["earthaccess", "tests", "stubs"] +disallow_untyped_defs = true +# TODO: incrementally work towards strict mode (currently too many errors) +# strict = true +pretty = true # Show additional context in error messages +enable_error_code = "redundant-self" [[tool.mypy.overrides]] module = [ "tests.*", ] -ignore_errors = true +disallow_untyped_defs = false + +[[tool.mypy.overrides]] +module = [ + "fsspec.*", + "dask.*", + "kerchunk.*", + "pqdm.*", + "s3fs", + "tinynetrc.*", # TODO: generate stubs for tinynetrc and remove this line + "vcr.unittest", # TODO: generate stubs for vcr and remove this line +] +ignore_missing_imports = true +[tool.pyright] +include = ["earthaccess"] +stubPath = "./stubs" [tool.ruff] line-length = 88 -src = ["earthaccess", "tests"] -exclude = ["mypy-stubs", "stubs", "typeshed"] +src = ["earthaccess", "stubs", "tests"] [tool.ruff.lint] extend-select = ["I"] @@ -110,7 +129,6 @@ extend-select = ["I"] [tool.ruff.lint.isort] combine-as-imports = true - [tool.bumpversion] current_version = "0.9.0" commit = false diff --git a/scripts/lint.sh b/scripts/lint.sh index 3a528811..02f9c70a 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -set -e -set -x +set -ex -mypy earthaccess --disallow-untyped-defs +mypy earthaccess stubs tests ruff check . diff --git a/stubs/cmr/__init__.pyi b/stubs/cmr/__init__.pyi new file mode 100644 index 00000000..3ea9733e --- /dev/null +++ b/stubs/cmr/__init__.pyi @@ -0,0 +1,10 @@ +from .queries import ( + CMR_OPS as CMR_OPS, + CMR_SIT as CMR_SIT, + CMR_UAT as CMR_UAT, + CollectionQuery as CollectionQuery, + GranuleQuery as GranuleQuery, + ServiceQuery as ServiceQuery, + ToolQuery as ToolQuery, + VariableQuery as VariableQuery, +) diff --git a/stubs/cmr/queries.pyi b/stubs/cmr/queries.pyi new file mode 100644 index 00000000..3b2fadc3 --- /dev/null +++ b/stubs/cmr/queries.pyi @@ -0,0 +1,108 @@ +import sys +from datetime import datetime +from typing import Any, Optional, SupportsFloat, Union + +if sys.version_info < (3, 9): + from typing import List, MutableMapping, Sequence, Tuple +else: + from builtins import list as List, tuple as Tuple + from collections.abc import MutableMapping, Sequence + +if sys.version_info < (3, 10): + from typing_extensions import TypeAlias +else: + from typing import TypeAlias + +if sys.version_info < (3, 11): + from typing_extensions import Self +else: + from typing import Self + +CMR_OPS: str +CMR_UAT: str +CMR_SIT: str + +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] + +class Query: + params: MutableMapping[str, Any] + options: MutableMapping[str, Any] + concept_id_chars: Sequence[str] + headers: MutableMapping[str, str] + + def __init__(self, route: str, mode: str = ...) -> None: ... + def _build_url(self) -> str: ... + def get(self, limit: int = ...) -> List[Any]: ... + def hits(self) -> int: ... + def get_all(self) -> List[Any]: ... + def parameters(self, **kwargs: Any) -> Self: ... + def format(self, output_format: str = "json") -> Self: ... + def concept_id(self, ids: Sequence[str]) -> Self: ... + def provider(self, provider: str) -> Self: ... + def mode(self, mode: str = ...) -> None: ... + def token(self, token: str) -> Self: ... + def bearer_token(self, bearer_token: str) -> Self: ... + +class GranuleCollectionBaseQuery(Query): + def online_only(self, online_only: bool = True) -> Self: ... + def temporal( + self, + date_from: Optional[Union[str, datetime]], + date_to: Optional[Union[str, datetime]], + exclude_boundary: bool = False, + ) -> Self: ... + def short_name(self, short_name: str) -> Self: ... + def version(self, version: str) -> Self: ... + def point(self, lon: FloatLike, lat: FloatLike) -> Self: ... + def circle(self, lon: FloatLike, lat: FloatLike, dist: FloatLike) -> Self: ... + def polygon(self, coordinates: Sequence[PointLike]) -> Self: ... + def bounding_box( + self, + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Self: ... + def line(self, coordinates: Sequence[PointLike]) -> Self: ... + def downloadable(self, downloadable: bool = True) -> Self: ... + def entry_title(self, entry_title: str) -> Self: ... + +class GranuleQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = ..., + ) -> Self: ... + def day_night_flag(self, day_night_flag: str) -> Self: ... + def cloud_cover( + self, + min_cover: Optional[FloatLike] = ..., + max_cover: Optional[FloatLike] = ..., + ) -> Self: ... + def instrument(self, instrument: str) -> Self: ... + def platform(self, platform: str) -> Self: ... + def sort_key(self, sort_key: str) -> Self: ... + def granule_ur(self, granule_ur: str) -> Self: ... + +class CollectionQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def archive_center(self, center: str) -> Self: ... + def keyword(self, text: str) -> Self: ... + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def tool_concept_id(self, ids: Sequence[str]) -> Self: ... + def service_concept_id(self, ids: Sequence[str]) -> Self: ... + +class ToolServiceVariableBaseQuery(Query): + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def name(self, name: str) -> Self: ... + +class ToolQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class ServiceQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class VariableQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/fixtures/vcr_cassettes/PODAAC.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/PODAAC.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/ALL.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/ALL.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml new file mode 100644 index 00000000..1b9e08ac --- /dev/null +++ b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml @@ -0,0 +1,466 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/json + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/tokens + response: + body: + string: '[{"access_token":"eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImRzY2h1Y2siLCJleHAiOjE3MTcyNjM4MTMsImlhdCI6MTcxMjA3OTgxMywiaXNzIjoiRWFydGhkYXRhIExvZ2luIn0.S_tw0-5JNFEv3si07GYVxvQi81QejNAT2Sh2ZIxAwmqr9UqoSmYg2Wp2Jdn3jaWrSVsRgxBXuLD5w7XFeRju2qOtIqovN3XGJ8VnTdvpklr-gTjk_iLq58334Zzbu5ntnqy-QTzPCKvjvqr3GNuIJcp9z7j5rzd3MEUYOFP1xsd8wehGLpBHzT6ZSzCOwdgzE1AufKq9Vd2GqM_5bc3M9cj-gGy2g3m1mP2OB41wiGvPzup79ds4t_gEPkCecm2rplCP4n1hrY6ZQtXshgM6o49J1nkGSJjE0olHcPwEujKE2s1htWZEycI1TCCxrGpx8K1vwEd0lNaekgPUWwdOlA","token_type":"Bearer","expiration_date":"06/01/2024"}]' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"61d0ce8df0bc684ac04ce623aea3668c" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - 28f6c88b-114d-4319-b6a2-0de0f54c9405 + X-Runtime: + - '0.013338' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/dschuck?client_id=ntD0YGC_SM3Bjs-Tnxd7bg + response: + body: + string: '{"uid":"dschuck","first_name":"Charles","last_name":"Daniels","email_address":"chuck@developmentseed.org","registered_date":" + 2 Apr 2024 17:43:33PM","country":"United States","study_area":"Other","allow_auth_app_emails":true,"user_type":"Application","affiliation":"Commercial","agreed_to_meris_eula":true,"agreed_to_sentinel_eula":true,"email_verified":true,"user_groups":[],"user_authorized_apps":23,"nams_auid":null}' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"5d6f0c723c97c730432ca73084995037" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - c8cf2bd3-731d-4863-8967-1906de679cbc + X-Runtime: + - '0.017383' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/profile + response: + body: + string: '' + headers: + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Type: + - text/html; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + Location: + - https://urs.earthdata.nasa.gov/home + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Set-Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0; path=/; expires=Wed, 10 + Apr 2024 21:58:55 GMT; HttpOnly + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - b6384698-e18b-4a99-b80c-b0ebe6cb80b7 + X-Runtime: + - '0.008282' + X-XSS-Protection: + - 1; mode=block + status: + code: 302 + message: Found +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0 + method: GET + uri: https://urs.earthdata.nasa.gov/home + response: + body: + string: "\n\n\n\n\n
\n + \ \n \n + \\n \n + \ Protection and maintenance of user profile information is described + in\n NASA's + Web Privacy Policy.\n \n
\n\n \n Protection and maintenance of user profile + information is described in\n NASA's + Web Privacy Policy.\n \n
\n