From bed9571bc657754db0c25b6e18a9530348d81005 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Mon, 4 Dec 2023 12:16:20 -0600 Subject: [PATCH 1/2] Add region to running in AWS check --- earthaccess/store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/earthaccess/store.py b/earthaccess/store.py index f2f3618e..173e1f47 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -141,11 +141,13 @@ def _am_i_in_aws(self) -> bool: try: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html resp = session.get( - "http://169.254.169.254/latest/meta-data/public-ipv4", timeout=1 + "http://169.254.169.254/latest/meta-data/placement/region", timeout=1 ) except Exception: return False - if resp.status_code == 200: + + if resp.status_code == 200 and b"us-west-2" == resp.content: + # On AWS in region us-west-2 return True return False From b0fa9c50ee1cd5a09c97b14ffa06e583bac2db4b Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Mon, 4 Dec 2023 12:55:15 -0600 Subject: [PATCH 2/2] Update names --- earthaccess/store.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/earthaccess/store.py b/earthaccess/store.py index 173e1f47..190b7c69 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -74,8 +74,8 @@ def make_instance( # When sending EarthAccessFiles between processes, it's possible that # we will need to switch between s3 <--> https protocols. - if (earthaccess.__store__.running_in_aws and cls is not s3fs.S3File) or ( - not earthaccess.__store__.running_in_aws and cls is s3fs.S3File + if (earthaccess.__store__.in_region and cls is not s3fs.S3File) or ( + not earthaccess.__store__.in_region and cls is s3fs.S3File ): # NOTE: This uses the first data_link listed in the granule. That's not # guaranteed to be the right one. @@ -112,7 +112,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None: else: print("Warning: the current session is not authenticated with NASA") self.auth = None - self.running_in_aws = self._am_i_in_aws() + self.in_region = self._running_in_us_west_2() def _derive_concept_provider(self, concept_id: Optional[str] = None) -> str: if concept_id is not None: @@ -136,7 +136,7 @@ def _own_s3_credentials(self, links: List[Dict[str, Any]]) -> Union[str, None]: return link["URL"] return None - def _am_i_in_aws(self) -> bool: + def _running_in_us_west_2(self) -> bool: session = self.auth.get_session() try: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html @@ -331,7 +331,7 @@ def _open_granules( "A valid Earthdata login instance is required to retrieve credentials" ) - if self.running_in_aws: + if self.in_region: if granules[0].cloud_hosted: access_method = "direct" provider = granules[0]["meta"]["provider-id"] @@ -405,7 +405,7 @@ def _open_urls( "A valid Earthdata login instance is required to retrieve S3 credentials" ) - if self.running_in_aws and granules[0].startswith("s3"): + if self.in_region and granules[0].startswith("s3"): if provider is not None: s3_fs = self.get_s3fs_session(provider=provider) if s3_fs is not None: @@ -510,12 +510,12 @@ def _get_urls( ) -> List[str]: data_links = granules downloaded_files: List = [] - if provider is None and self.running_in_aws and "cumulus" in data_links[0]: + if provider is None and self.in_region and "cumulus" in data_links[0]: raise ValueError( "earthaccess can't yet guess the provider for cloud collections, " "we need to use one from earthaccess.list_cloud_providers()" ) - if self.running_in_aws and data_links[0].startswith("s3"): + if self.in_region and data_links[0].startswith("s3"): print(f"Accessing cloud dataset using provider: {provider}") s3_fs = self.get_s3fs_session(provider=provider) # TODO: make this parallel or concurrent @@ -543,11 +543,11 @@ def _get_granules( provider = granules[0]["meta"]["provider-id"] endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"]) cloud_hosted = granules[0].cloud_hosted - access = "direct" if (cloud_hosted and self.running_in_aws) else "external" + access = "direct" if (cloud_hosted and self.in_region) else "external" data_links = list( # we are not in region chain.from_iterable( - granule.data_links(access=access, in_region=self.running_in_aws) + granule.data_links(access=access, in_region=self.in_region) for granule in granules ) )