Skip to content

Commit

Permalink
[PYG-257] 🥺CogniteFile download (#371)
Browse files Browse the repository at this point in the history
* refactor: added API

* feat; added file content api

* build; changelog

* refactor: regen

* docs: documentation

* build; bump
  • Loading branch information
doctrino authored Nov 17, 2024
1 parent 91956d8 commit 87bd6e0
Show file tree
Hide file tree
Showing 30 changed files with 645 additions and 15 deletions.
1 change: 1 addition & 0 deletions cognite/pygen/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
}

COGNITE_TIMESERIES = dm.ContainerId("cdf_cdm", "CogniteTimeSeries")
COGNITE_FILE = dm.ContainerId("cdf_cdm", "CogniteFile")


def is_readonly_property(container: dm.ContainerId, identifier: str) -> bool:
Expand Down
12 changes: 12 additions & 0 deletions cognite/pygen/_core/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ def generate_apis(self, client_dir: Path) -> dict[Path, str]:
sdk[data_classes_dir / "_core" / "query.py"] = self.generate_data_class_core_query_file()
sdk[data_classes_dir / "_core" / "cdf_external.py"] = self.generate_data_class_core_cdf_external_file()
sdk[data_classes_dir / "_core" / "datapoints_api.py"] = self.generate_data_class_core_datapoints_api_file()
sdk[data_classes_dir / "_core" / "filecontent_api.py"] = self.generate_data_class_core_filecontent_api_file()
return sdk

def generate_api_core_file(self) -> str:
Expand Down Expand Up @@ -548,6 +549,17 @@ def generate_data_class_core_datapoints_api_file(self) -> str:
+ "\n"
)

def generate_data_class_core_filecontent_api_file(self) -> str:
"""Generate the core data classes file for the SDK."""
data_class_core = self.env.get_template("data_classes_core_filecontent_api.py.jinja")

return (
data_class_core.render(
top_level_package=self.top_level_package,
)
+ "\n"
)

def generate_data_class_core_cdf_external_file(self) -> str:
"""Generate the core data classes file for the SDK."""
data_class_core = self.env.get_template("data_classes_core_cdf_external.py.jinja")
Expand Down
11 changes: 10 additions & 1 deletion cognite/pygen/_core/models/data_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from cognite.client.data_classes.data_modeling.views import ViewProperty

from cognite.pygen import config as pygen_config
from cognite.pygen._constants import COGNITE_TIMESERIES
from cognite.pygen._constants import COGNITE_FILE, COGNITE_TIMESERIES
from cognite.pygen.config.reserved_words import is_reserved_word
from cognite.pygen.utils.cdf import _find_first_node_type
from cognite.pygen.utils.text import create_name, to_pascal, to_words
Expand Down Expand Up @@ -202,6 +202,15 @@ def is_cognite_timeseries(self) -> bool:
for field in self
)

@property
def is_cognite_file(self) -> bool:
return any(
isinstance(field, BaseConnectionField | BasePrimitiveField)
and field.container is not None
and field.container.source == COGNITE_FILE
for field in self
)

@property
def read_base_class(self) -> str:
"""Parent read classes."""
Expand Down
6 changes: 4 additions & 2 deletions cognite/pygen/_core/templates/data_class_node.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ from pydantic import field_validator, model_validator
from {{ top_level_package }}.data_classes._core import ({% if has_default_instance_space %}
DEFAULT_INSTANCE_SPACE,{% endif %}
DEFAULT_QUERY_LIMIT,{% if data_class.is_cognite_timeseries %}
DataPointsAPI,{% endif %}
DataPointsAPI,{% endif %}{% if data_class.is_cognite_file %}
FileContentAPI,{% endif %}
DataRecord,
DataRecordGraphQL,
DataRecordWrite,
Expand Down Expand Up @@ -587,7 +588,8 @@ class _{{ data_class.query_cls_name }}(NodeQueryCore[T_DomainModelList, {{ data_
self.external_id,{% for field in data_class.filtering_fields %}
self.{{ field.name }},{% endfor %}
]){% endif %}{% if data_class.is_cognite_timeseries %}
self.data = DataPointsAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}
self.data = DataPointsAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}{% if data_class.is_cognite_file %}
self.content = FileContentAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}

def list_{{ data_class.variable }}(self, limit: int = DEFAULT_QUERY_LIMIT) -> {{ data_class.read_list_name }}:
return self._list(limit=limit)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from collections.abc import Callable
from pathlib import Path

from cognite.client import CogniteClient
from cognite.client.data_classes.data_modeling.ids import NodeId

from {{ top_level_package }}.data_classes._core.constants import DEFAULT_QUERY_LIMIT


class FileContentAPI:
def __init__(self, client: CogniteClient, get_node_ids: Callable[[int], list[NodeId]]) -> None:
self._client = client
self._get_node_ids = get_node_ids

def download(
self,
directory: str | Path,
keep_directory_structure: bool = False,
resolve_duplicate_file_names: bool = False,
files_limit: int = DEFAULT_QUERY_LIMIT,
) -> None:
"""`Download files. <https://developer.cognite.com/api#tag/Files/operation/downloadLinks>`_

This method will stream all files to disk, never keeping more than 2MB in memory per worker.
The files will be stored in the provided directory using the file name retrieved from the file metadata in CDF.
You can also choose to keep the directory structure from CDF so that the files will be stored in subdirectories
matching the directory attribute on the files. When missing, the (root) directory is used.
By default, duplicate file names to the same local folder will be resolved by only keeping one of the files.
You can choose to resolve this by appending a number to the file name using the resolve_duplicate_file_names argument.

Warning:
If you are downloading several files at once, be aware that file name collisions lead to all-but-one of
the files missing. A warning is issued when this happens, listing the affected files.

Args:
directory (str | Path): Directory to download the file(s) to.
keep_directory_structure (bool): Whether to keep the directory hierarchy in CDF,
creating subdirectories as needed below the given directory.
resolve_duplicate_file_names (bool): Whether to resolve duplicate file names by appending a number on duplicate file names
files_limit (int): Maximum number of files to download. Defaults to 5.
"""
node_ids = self._get_node_ids(files_limit)
if not node_ids:
return None
self._client.files.download(
directory=directory,
instance_id=node_ids,
keep_directory_structure=keep_directory_structure,
resolve_duplicate_file_names=resolve_duplicate_file_names,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ from {{ top_level_package }}.data_classes._core.base import * # noqa
from {{ top_level_package }}.data_classes._core.cdf_external import * # noqa
from {{ top_level_package }}.data_classes._core.datapoints_api import * # noqa
from {{ top_level_package }}.data_classes._core.helpers import * # noqa
from {{ top_level_package }}.data_classes._core.filecontent_api import * # noqa
from {{ top_level_package }}.data_classes._core.query import * # noqa
2 changes: 1 addition & 1 deletion cognite/pygen/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.99.49"
__version__ = "0.99.50"
1 change: 1 addition & 0 deletions cognite/pygen/config/reserved_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"type",
"list_full",
"data",
"content",
}
| {f for f in dir(BaseModel)}
| {
Expand Down
3 changes: 3 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Changes are grouped as follows
- Any views that extends the `CogniteTimeSeries` now has the property `data` you can use to retrieve datapoints.
For example, `pygen.rotor.select().rotor_speed_controller.data.retrieve_dataframe(...)` will retrieve the datapoints
for the `rotor_speed_controller` timeseries.
- Any views that extends the `CogniteFile` now has the property `content` you can use to download the file.
For example, `pygen.wind_turbine.select().datasheets.content.download("my_directory")` will download the files
for the `data_sheet` files for all wind turbines.

### Fixed
- The `.query()` method has been renamed to `.select()`. The `.query()` method is still available, but will
Expand Down
Loading

0 comments on commit 87bd6e0

Please sign in to comment.