diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml index 4c62ea5c6a5..d8f11e3c10b 100644 --- a/.github/conda/meta.yaml +++ b/.github/conda/meta.yaml @@ -24,7 +24,7 @@ requirements: - dataclasses - multiprocess - fsspec - - huggingface_hub >=0.22.0,<1.0.0 + - huggingface_hub >=0.23.0,<1.0.0 - packaging - aiohttp run: @@ -41,7 +41,7 @@ requirements: - dataclasses - multiprocess - fsspec - - huggingface_hub >=0.22.0,<1.0.0 + - huggingface_hub >=0.23.0,<1.0.0 - packaging - aiohttp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 383c96332c8..2951be28289 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,7 @@ jobs: run: uv pip install --system --upgrade pyarrow huggingface-hub "dill<0.3.9" - name: Install dependencies (minimum versions) if: ${{ matrix.deps_versions != 'deps-latest' }} - run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.22.0 transformers dill==0.3.1.1 + run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.23.5 transformers dill==0.3.1.1 - name: Test with pytest run: | python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/ diff --git a/setup.py b/setup.py index 05c5aef48e3..82017ad9ee1 100644 --- a/setup.py +++ b/setup.py @@ -133,7 +133,7 @@ # for data streaming via http "aiohttp", # To get datasets from the Datasets Hub on huggingface.co - "huggingface-hub>=0.22.0", + "huggingface-hub>=0.23.0", # Utilities from PyPA to e.g., compare versions "packaging", # To parse YAML metadata from dataset cards diff --git a/src/datasets/load.py b/src/datasets/load.py index bd3e23473bc..0faf2fd5cb5 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -40,6 +40,8 @@ from huggingface_hub.utils import ( EntryNotFoundError, GatedRepoError, + LocalEntryNotFoundError, + OfflineModeIsEnabled, RepositoryNotFoundError, RevisionNotFoundError, get_session, @@ -79,7 +81,6 @@ from .splits import Split from .utils import _dataset_viewer from .utils.file_utils import ( - OfflineModeIsEnabled, _raise_if_offline_mode_is_enabled, cached_path, get_datasets_user_agent, @@ -1603,9 +1604,19 @@ def dataset_module_factory( proxies=download_config.proxies, ) commit_hash = os.path.basename(os.path.dirname(dataset_readme_path)) - except EntryNotFoundError as e: - if "internet connection" in str(e).lower(): + except LocalEntryNotFoundError as e: + if isinstance( + e.__cause__, + ( + OfflineModeIsEnabled, + requests.exceptions.ConnectTimeout, + requests.exceptions.ConnectionError, + ), + ): raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({e.__class__.__name__})") from e + else: + raise + except EntryNotFoundError: commit_hash = api.dataset_info( path, revision=revision, diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py index 9bd2a1c3928..e44b1ce12bc 100644 --- a/src/datasets/utils/file_utils.py +++ b/src/datasets/utils/file_utils.py @@ -282,14 +282,10 @@ def get_authentication_headers_for_url(url: str, token: Optional[Union[str, bool return {} -class OfflineModeIsEnabled(ConnectionError): - pass - - def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None): """Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_HUB_OFFLINE is True.""" if config.HF_HUB_OFFLINE: - raise OfflineModeIsEnabled( + raise huggingface_hub.errors.OfflineModeIsEnabled( "Offline mode is enabled." if msg is None else "Offline mode is enabled. " + str(msg) ) diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index e82b5d11dc9..6f6ac01df9a 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -7,10 +7,10 @@ import zstandard as zstd from fsspec.registry import _registry as _fsspec_registry from fsspec.spec import AbstractBufferedFile, AbstractFileSystem +from huggingface_hub.errors import OfflineModeIsEnabled from datasets.download.download_config import DownloadConfig from datasets.utils.file_utils import ( - OfflineModeIsEnabled, _get_extraction_protocol, _prepare_single_hop_path_and_storage_options, cached_path,