Skip to content

Commit

Permalink
initial video support
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Oct 15, 2024
1 parent 6e8635d commit fe601ba
Show file tree
Hide file tree
Showing 6 changed files with 318 additions and 7 deletions.
4 changes: 4 additions & 0 deletions docs/source/package_reference/main_classes.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ Dictionary with split names as keys ('train', 'test' for example), and `Iterable

[[autodoc]] datasets.Image

### Video

[[autodoc]] datasets.Video

## Filesystems

[[autodoc]] datasets.filesystems.is_remote_filesystem
Expand Down
10 changes: 5 additions & 5 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
from .arrow_writer import ArrowWriter, OptimizedTypedSequence
from .data_files import sanitize_patterns
from .download.streaming_download_manager import xgetsize
from .features import Audio, ClassLabel, Features, Image, Sequence, Value
from .features import Audio, ClassLabel, Features, Image, Sequence, Value, Video
from .features.features import (
FeatureType,
_align_features,
Expand Down Expand Up @@ -1416,9 +1416,9 @@ def save_to_disk(
"""
Saves a dataset to a dataset directory, or in a filesystem using any implementation of `fsspec.spec.AbstractFileSystem`.
For [`Image`] and [`Audio`] data:
For [`Image`], [`Audio`] and [`Video`] data:
All the Image() and Audio() data are stored in the arrow files.
All the Image(), Audio() and Video() data are stored in the arrow files.
If you want to store paths or urls, please use the Value("string") type.
Args:
Expand Down Expand Up @@ -5065,7 +5065,7 @@ def _estimate_nbytes(self) -> int:

def extra_nbytes_visitor(array, feature):
nonlocal extra_nbytes
if isinstance(feature, (Audio, Image)):
if isinstance(feature, (Audio, Image, Video)):
for x in array.to_pylist():
if x is not None and x["bytes"] is None and x["path"] is not None:
size = xgetsize(x["path"])
Expand Down Expand Up @@ -5310,7 +5310,7 @@ def push_to_hub(
"""Pushes the dataset to the hub as a Parquet dataset.
The dataset is pushed using HTTP requests and does not need to have neither git or git-lfs installed.
The resulting Parquet files are self-contained by default. If your dataset contains [`Image`] or [`Audio`]
The resulting Parquet files are self-contained by default. If your dataset contains [`Image`], [`Audio`] or [`Video`]
data, the Parquet files will store the bytes of your images or audio files.
You can disable this by setting `embed_external_files` to `False`.
Expand Down
1 change: 1 addition & 0 deletions src/datasets/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
IS_MP3_SUPPORTED = importlib.util.find_spec("soundfile") is not None and version.parse(
importlib.import_module("soundfile").__libsndfile_version__
) >= version.parse("1.1.0")
DECORD_AVAILABLE = importlib.util.find_spec("decord") is not None

# Optional compression tools
RARFILE_AVAILABLE = importlib.util.find_spec("rarfile") is not None
Expand Down
2 changes: 2 additions & 0 deletions src/datasets/features/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
"Image",
"Translation",
"TranslationVariableLanguages",
"Video",
]
from .audio import Audio
from .features import Array2D, Array3D, Array4D, Array5D, ClassLabel, Features, LargeList, Sequence, Value
from .image import Image
from .translation import Translation, TranslationVariableLanguages
from .video import Video
7 changes: 5 additions & 2 deletions src/datasets/features/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from .audio import Audio
from .image import Image, encode_pil_image
from .translation import Translation, TranslationVariableLanguages
from .video import Video


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -1202,6 +1203,7 @@ class LargeList:
Array5D,
Audio,
Image,
Video,
]


Expand Down Expand Up @@ -1346,7 +1348,7 @@ def encode_nested_example(schema, obj, level=0):
return list(obj)
# Object with special encoding:
# ClassLabel will convert from string to int, TranslationVariableLanguages does some checks
elif isinstance(schema, (Audio, Image, ClassLabel, TranslationVariableLanguages, Value, _ArrayXD)):
elif isinstance(schema, (Audio, Image, ClassLabel, TranslationVariableLanguages, Value, _ArrayXD, Video)):
return schema.encode_example(obj) if obj is not None else None
# Other object should be directly convertible to a native Arrow type (like Translation and Translation)
return obj
Expand Down Expand Up @@ -1397,7 +1399,7 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[Dict[str, Uni
else:
return decode_nested_example([schema.feature], obj)
# Object with special decoding:
elif isinstance(schema, (Audio, Image)):
elif isinstance(schema, (Audio, Image, Video)):
# we pass the token to read and decode files from private repositories in streaming mode
if obj is not None and schema.decode:
return schema.decode_example(obj, token_per_repo_id=token_per_repo_id)
Expand All @@ -1417,6 +1419,7 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[Dict[str, Uni
Array5D.__name__: Array5D,
Audio.__name__: Audio,
Image.__name__: Image,
Video.__name__: Video,
}


Expand Down
Loading

0 comments on commit fe601ba

Please sign in to comment.