Skip to content

Commit

Permalink
Actually fix the file update issue (#393)
Browse files Browse the repository at this point in the history
* Actually fix the file update issue

* Bump version

---------

Co-authored-by: cognite-bulldozer[bot] <51074376+cognite-bulldozer[bot]@users.noreply.github.com>
  • Loading branch information
einarmo and cognite-bulldozer[bot] authored Nov 18, 2024
1 parent 5659ae1 commit 6f14cc4
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## 7.5.4

### Fixed

* Fix issue caused by attempting to update file mimeType on AWS clusters.

## 7.5.3

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion cognite/extractorutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@
Cognite extractor utils is a Python package that simplifies the development of new extractors.
"""

__version__ = "7.5.3"
__version__ = "7.5.4"
from .base import Extractor
24 changes: 17 additions & 7 deletions cognite/extractorutils/uploader/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from requests.utils import super_len

from cognite.client import CogniteClient
from cognite.client.data_classes import FileMetadata
from cognite.client.data_classes import FileMetadata, FileMetadataUpdate
from cognite.client.data_classes.data_modeling import NodeId
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorFileApply
from cognite.client.utils._identifier import IdentifierSequence
Expand Down Expand Up @@ -261,13 +261,23 @@ def _upload_empty(
file_meta_response, url = self.cdf_client.files.create(
file_metadata=file_meta, overwrite=self.overwrite_existing
)
# trigger update after creation (upsert =P)
basic_attributes = set(["externalId", "name"])
attr = set(file_meta.dump().keys())
diff = attr - basic_attributes

if len(diff) >= 1 and "externalId" in attr:
file_meta_response = self.cdf_client.files.update(file_meta)
# The files API for whatever reason doesn't update directory or source when you overwrite,
# so we need to update those later.
any_unchaged = (
file_meta_response.directory != file_meta.directory or file_meta_response.source != file_meta.source
)
if any_unchaged:
update = FileMetadataUpdate(external_id=file_meta.external_id)
any = False
if file_meta.source:
any = True
update.source.set(file_meta.source)
if file_meta.directory:
any = True
update.directory.set(file_meta.directory)
if any:
self.cdf_client.files.update(update)

return file_meta_response, url

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-extractor-utils"
version = "7.5.3"
version = "7.5.4"
description = "Utilities for easier development of extractors for CDF"
authors = ["Mathias Lohne <[email protected]>"]
license = "Apache-2.0"
Expand Down
24 changes: 24 additions & 0 deletions tests/tests_integration/test_file_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,27 @@ def read_file() -> BufferedReadWithLength:

assert len(bigfile) == 10_000_000
assert len(bigfile2) == 10_000_000


def test_update_files(set_upload_test: Tuple[CogniteClient, ParamTest]) -> None:
client, test_parameter = set_upload_test
queue = BytesUploadQueue(cdf_client=client, overwrite_existing=True, max_queue_size=1)

queue.add_to_upload_queue(
content=b"bytes content",
file_meta=FileMetadata(external_id=test_parameter.external_ids[0], name=test_parameter.external_ids[0]),
)
queue.add_to_upload_queue(
content=b"bytes content",
file_meta=FileMetadata(
external_id=test_parameter.external_ids[0],
name=test_parameter.external_ids[0],
source="some-source",
directory="/some/directory",
),
)

queue.upload()
file = client.files.retrieve(external_id=test_parameter.external_ids[0])
assert file.source == "some-source"
assert file.directory == "/some/directory"

0 comments on commit 6f14cc4

Please sign in to comment.