Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use huggingface_hub offline mode #7244

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ requirements:
- dataclasses
- multiprocess
- fsspec
- huggingface_hub >=0.22.0,<1.0.0
- huggingface_hub >=0.23.0,<1.0.0
- packaging
- aiohttp
run:
Expand All @@ -41,7 +41,7 @@ requirements:
- dataclasses
- multiprocess
- fsspec
- huggingface_hub >=0.22.0,<1.0.0
- huggingface_hub >=0.23.0,<1.0.0
- packaging
- aiohttp

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
run: uv pip install --system --upgrade pyarrow huggingface-hub "dill<0.3.9"
- name: Install dependencies (minimum versions)
if: ${{ matrix.deps_versions != 'deps-latest' }}
run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.22.0 transformers dill==0.3.1.1
run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.23.5 transformers dill==0.3.1.1
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
# for data streaming via http
"aiohttp",
# To get datasets from the Datasets Hub on huggingface.co
"huggingface-hub>=0.22.0",
"huggingface-hub>=0.23.0",
# Utilities from PyPA to e.g., compare versions
"packaging",
# To parse YAML metadata from dataset cards
Expand Down
17 changes: 14 additions & 3 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
from huggingface_hub.utils import (
EntryNotFoundError,
GatedRepoError,
LocalEntryNotFoundError,
OfflineModeIsEnabled,
RepositoryNotFoundError,
RevisionNotFoundError,
get_session,
Expand Down Expand Up @@ -79,7 +81,6 @@
from .splits import Split
from .utils import _dataset_viewer
from .utils.file_utils import (
OfflineModeIsEnabled,
_raise_if_offline_mode_is_enabled,
cached_path,
get_datasets_user_agent,
Expand Down Expand Up @@ -1603,9 +1604,19 @@ def dataset_module_factory(
proxies=download_config.proxies,
)
commit_hash = os.path.basename(os.path.dirname(dataset_readme_path))
except EntryNotFoundError as e:
if "internet connection" in str(e).lower():
except LocalEntryNotFoundError as e:
if isinstance(
e.__cause__,
(
OfflineModeIsEnabled,
requests.exceptions.ConnectTimeout,
requests.exceptions.ConnectionError,
),
):
raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({e.__class__.__name__})") from e
else:
raise
except EntryNotFoundError:
commit_hash = api.dataset_info(
path,
revision=revision,
Expand Down
6 changes: 1 addition & 5 deletions src/datasets/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,10 @@ def get_authentication_headers_for_url(url: str, token: Optional[Union[str, bool
return {}


class OfflineModeIsEnabled(ConnectionError):
pass


def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
"""Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_HUB_OFFLINE is True."""
if config.HF_HUB_OFFLINE:
raise OfflineModeIsEnabled(
raise huggingface_hub.errors.OfflineModeIsEnabled(
"Offline mode is enabled." if msg is None else "Offline mode is enabled. " + str(msg)
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import zstandard as zstd
from fsspec.registry import _registry as _fsspec_registry
from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
from huggingface_hub.errors import OfflineModeIsEnabled

from datasets.download.download_config import DownloadConfig
from datasets.utils.file_utils import (
OfflineModeIsEnabled,
_get_extraction_protocol,
_prepare_single_hop_path_and_storage_options,
cached_path,
Expand Down
Loading