[PYG-257] 🥺CogniteFile download (#371)

* refactor: added API * feat; added file content api * build; changelog * refactor: regen * docs: documentation * build; bump
cognitedata · Nov 17, 2024 · 87bd6e0 · 87bd6e0
1 parent 91956d8
commit 87bd6e0
Show file tree

Hide file tree

Showing 30 changed files with 645 additions and 15 deletions.
diff --git a/cognite/pygen/_constants.py b/cognite/pygen/_constants.py
@@ -10,6 +10,7 @@
 }
 
 COGNITE_TIMESERIES = dm.ContainerId("cdf_cdm", "CogniteTimeSeries")
+COGNITE_FILE = dm.ContainerId("cdf_cdm", "CogniteFile")
 
 
 def is_readonly_property(container: dm.ContainerId, identifier: str) -> bool:

diff --git a/cognite/pygen/_core/generators.py b/cognite/pygen/_core/generators.py
@@ -430,6 +430,7 @@ def generate_apis(self, client_dir: Path) -> dict[Path, str]:
         sdk[data_classes_dir / "_core" / "query.py"] = self.generate_data_class_core_query_file()
         sdk[data_classes_dir / "_core" / "cdf_external.py"] = self.generate_data_class_core_cdf_external_file()
         sdk[data_classes_dir / "_core" / "datapoints_api.py"] = self.generate_data_class_core_datapoints_api_file()
+        sdk[data_classes_dir / "_core" / "filecontent_api.py"] = self.generate_data_class_core_filecontent_api_file()
         return sdk
 
     def generate_api_core_file(self) -> str:
@@ -548,6 +549,17 @@ def generate_data_class_core_datapoints_api_file(self) -> str:
             + "\n"
         )
 
+    def generate_data_class_core_filecontent_api_file(self) -> str:
+        """Generate the core data classes file for the SDK."""
+        data_class_core = self.env.get_template("data_classes_core_filecontent_api.py.jinja")
+
+        return (
+            data_class_core.render(
+                top_level_package=self.top_level_package,
+            )
+            + "\n"
+        )
+
     def generate_data_class_core_cdf_external_file(self) -> str:
         """Generate the core data classes file for the SDK."""
         data_class_core = self.env.get_template("data_classes_core_cdf_external.py.jinja")

diff --git a/cognite/pygen/_core/models/data_classes.py b/cognite/pygen/_core/models/data_classes.py
@@ -11,7 +11,7 @@
 from cognite.client.data_classes.data_modeling.views import ViewProperty
 
 from cognite.pygen import config as pygen_config
-from cognite.pygen._constants import COGNITE_TIMESERIES
+from cognite.pygen._constants import COGNITE_FILE, COGNITE_TIMESERIES
 from cognite.pygen.config.reserved_words import is_reserved_word
 from cognite.pygen.utils.cdf import _find_first_node_type
 from cognite.pygen.utils.text import create_name, to_pascal, to_words
@@ -202,6 +202,15 @@ def is_cognite_timeseries(self) -> bool:
             for field in self
         )
 
+    @property
+    def is_cognite_file(self) -> bool:
+        return any(
+            isinstance(field, BaseConnectionField | BasePrimitiveField)
+            and field.container is not None
+            and field.container.source == COGNITE_FILE
+            for field in self
+        )
+
     @property
     def read_base_class(self) -> str:
         """Parent read classes."""

diff --git a/cognite/pygen/_core/templates/data_class_node.py.jinja b/cognite/pygen/_core/templates/data_class_node.py.jinja
@@ -20,7 +20,8 @@ from pydantic import field_validator, model_validator
 from {{ top_level_package }}.data_classes._core import ({% if has_default_instance_space %}
     DEFAULT_INSTANCE_SPACE,{% endif %}
     DEFAULT_QUERY_LIMIT,{% if data_class.is_cognite_timeseries %}
-    DataPointsAPI,{% endif %}
+    DataPointsAPI,{% endif %}{% if data_class.is_cognite_file %}
+    FileContentAPI,{% endif %}
     DataRecord,
     DataRecordGraphQL,
     DataRecordWrite,
@@ -587,7 +588,8 @@ class _{{ data_class.query_cls_name }}(NodeQueryCore[T_DomainModelList, {{ data_
             self.external_id,{% for field in data_class.filtering_fields %}
             self.{{ field.name }},{% endfor %}
         ]){% endif %}{% if data_class.is_cognite_timeseries %}
-        self.data = DataPointsAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}
+        self.data = DataPointsAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}{% if data_class.is_cognite_file %}
+        self.content = FileContentAPI(client, lambda limit: self._list(limit=limit).as_node_ids()){% endif %}
 
     def list_{{ data_class.variable }}(self, limit: int = DEFAULT_QUERY_LIMIT) -> {{ data_class.read_list_name }}:
         return self._list(limit=limit)

diff --git a/cognite/pygen/_core/templates/data_classes_core_filecontent_api.py.jinja b/cognite/pygen/_core/templates/data_classes_core_filecontent_api.py.jinja
@@ -0,0 +1,50 @@
+from collections.abc import Callable
+from pathlib import Path
+
+from cognite.client import CogniteClient
+from cognite.client.data_classes.data_modeling.ids import NodeId
+
+from {{ top_level_package }}.data_classes._core.constants import DEFAULT_QUERY_LIMIT
+
+
+class FileContentAPI:
+    def __init__(self, client: CogniteClient, get_node_ids: Callable[[int], list[NodeId]]) -> None:
+        self._client = client
+        self._get_node_ids = get_node_ids
+
+    def download(
+        self,
+        directory: str | Path,
+        keep_directory_structure: bool = False,
+        resolve_duplicate_file_names: bool = False,
+        files_limit: int = DEFAULT_QUERY_LIMIT,
+    ) -> None:
+        """`Download files. <https://developer.cognite.com/api#tag/Files/operation/downloadLinks>`_
+
+        This method will stream all files to disk, never keeping more than 2MB in memory per worker.
+        The files will be stored in the provided directory using the file name retrieved from the file metadata in CDF.
+        You can also choose to keep the directory structure from CDF so that the files will be stored in subdirectories
+        matching the directory attribute on the files. When missing, the (root) directory is used.
+        By default, duplicate file names to the same local folder will be resolved by only keeping one of the files.
+        You can choose to resolve this by appending a number to the file name using the resolve_duplicate_file_names argument.
+
+        Warning:
+            If you are downloading several files at once, be aware that file name collisions lead to all-but-one of
+            the files missing. A warning is issued when this happens, listing the affected files.
+
+        Args:
+            directory (str | Path): Directory to download the file(s) to.
+            keep_directory_structure (bool): Whether to keep the directory hierarchy in CDF,
+                creating subdirectories as needed below the given directory.
+            resolve_duplicate_file_names (bool): Whether to resolve duplicate file names by appending a number on duplicate file names
+            files_limit (int): Maximum number of files to download. Defaults to 5.
+        """
+        node_ids = self._get_node_ids(files_limit)
+        if not node_ids:
+            return None
+        self._client.files.download(
+            directory=directory,
+            instance_id=node_ids,
+            keep_directory_structure=keep_directory_structure,
+            resolve_duplicate_file_names=resolve_duplicate_file_names,
+        )
diff --git a/cognite/pygen/_core/templates/data_classes_core_init.py.jinja b/cognite/pygen/_core/templates/data_classes_core_init.py.jinja
@@ -3,4 +3,5 @@ from {{ top_level_package }}.data_classes._core.base import *  # noqa
 from {{ top_level_package }}.data_classes._core.cdf_external import *  # noqa
 from {{ top_level_package }}.data_classes._core.datapoints_api import *  # noqa
 from {{ top_level_package }}.data_classes._core.helpers import *  # noqa
+from {{ top_level_package }}.data_classes._core.filecontent_api import *  # noqa
 from {{ top_level_package }}.data_classes._core.query import *  # noqa
diff --git a/cognite/pygen/_version.py b/cognite/pygen/_version.py
@@ -1 +1 @@
-__version__ = "0.99.49"
+__version__ = "0.99.50"
diff --git a/cognite/pygen/config/reserved_words.py b/cognite/pygen/config/reserved_words.py
@@ -24,6 +24,7 @@
         "type",
         "list_full",
         "data",
+        "content",
     }
     | {f for f in dir(BaseModel)}
     | {

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -18,6 +18,9 @@ Changes are grouped as follows
 - Any views that extends the `CogniteTimeSeries` now has the property `data` you can use to retrieve datapoints.
   For example, `pygen.rotor.select().rotor_speed_controller.data.retrieve_dataframe(...)` will retrieve the datapoints
   for the `rotor_speed_controller` timeseries.
+- Any views that extends the `CogniteFile` now has the property `content` you can use to download the file.
+  For example, `pygen.wind_turbine.select().datasheets.content.download("my_directory")` will download the files
+  for the `data_sheet` files for all wind turbines.
 
 ### Fixed
 - The `.query()` method has been renamed to `.select()`. The `.query()` method is still available, but will