Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename gcs.endpoint to gcs.service.host #1007

Merged
merged 9 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ For the FileIO there are several configuration options available:
| gcs.cache-timeout | 60 | Configure the cache expiration time in seconds for object metadata cache |
| gcs.requester-pays | False | Configure whether to use requester-pays requests |
| gcs.session-kwargs | {} | Configure a dict of parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy settings. |
| gcs.endpoint | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
| gcs.service.host | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
| gcs.default-location | US | Configure the default location where buckets are created, like 'US' or 'EUROPE-WEST3'. |
| gcs.version-aware | False | Configure whether to support object versioning on the GCS bucket. |

Expand Down
1 change: 1 addition & 0 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
GCS_REQUESTER_PAYS = "gcs.requester-pays"
GCS_SESSION_KWARGS = "gcs.session-kwargs"
GCS_ENDPOINT = "gcs.endpoint"
GCS_SERVICE_HOST = "gcs.service.host"
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
GCS_VERSION_AWARE = "gcs.version-aware"
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
Expand Down
9 changes: 8 additions & 1 deletion pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_REQUESTER_PAYS,
GCS_SERVICE_HOST,
GCS_SESSION_KWARGS,
GCS_TOKEN,
GCS_VERSION_AWARE,
Expand Down Expand Up @@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem:
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
from gcsfs import GCSFileSystem

if properties.get(GCS_ENDPOINT):
deprecation_message(
deprecated_in="0.8.0",
removed_in="0.9.0",
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
)
return GCSFileSystem(
project=properties.get(GCS_PROJECT_ID),
access=properties.get(GCS_ACCESS, "full_control"),
Expand All @@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
endpoint_url=properties.get(GCS_ENDPOINT),
endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT),
default_location=properties.get(GCS_DEFAULT_LOCATION),
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
)
Expand Down
12 changes: 10 additions & 2 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
AWS_SESSION_TOKEN,
GCS_DEFAULT_LOCATION,
GCS_ENDPOINT,
GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
HDFS_HOST,
Expand Down Expand Up @@ -159,7 +160,7 @@
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
from pyiceberg.utils.deprecated import deprecated
from pyiceberg.utils.deprecated import deprecated, deprecation_message
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
Expand Down Expand Up @@ -390,7 +391,14 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
gcs_kwargs["default_bucket_location"] = bucket_location
if endpoint := self.properties.get(GCS_ENDPOINT):
if (endpoint := self.properties.get(GCS_ENDPOINT)) and GCS_SERVICE_HOST not in self.properties:
deprecation_message(
deprecated_in="0.8.0",
removed_in="0.9.0",
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
)
self.properties[GCS_SERVICE_HOST] = endpoint
if endpoint := self.properties.get(GCS_SERVICE_HOST):
Fokko marked this conversation as resolved.
Show resolved Hide resolved
url_parts = urlparse(endpoint)
gcs_kwargs["scheme"] = url_parts.scheme
gcs_kwargs["endpoint_override"] = url_parts.netloc
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@
from pyiceberg.catalog.noop import NoopCatalog
from pyiceberg.expressions import BoundReference
from pyiceberg.io import (
GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
fsspec,
Expand Down Expand Up @@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> FsspecFileIO:
@pytest.fixture
def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
properties = {
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
}
Expand All @@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO":
from pyiceberg.io.pyarrow import PyArrowFileIO

properties = {
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 1000,
Expand Down