Skip to content

Commit

Permalink
fix(import): prepare_import
Browse files Browse the repository at this point in the history
  • Loading branch information
20cents committed Nov 22, 2024
1 parent fe063e0 commit d3b0e0e
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 15 deletions.
7 changes: 4 additions & 3 deletions backend/geonature/core/gn_synthese/imports/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def check_transient_data(task, logger, imprt: TImports):
selected_fields = {
field_name: fields[field_name]
for field_name, source_field in imprt.fieldmapping.items()
if source_field in imprt.columns
if source_field.get('column_src', None) in imprt.columns
}
init_rows_validity(imprt)
task.update_state(state="PROGRESS", meta={"progress": 0.05})
Expand Down Expand Up @@ -339,11 +339,12 @@ def import_data_to_destination(imprt: TImports) -> None:
if field_name not in fields: # not a destination field
continue
field = fields[field_name]
column_src = source_field.get('column_src', None)
if field.multi:
if not set(source_field).isdisjoint(imprt.columns):
if not set(column_src).isdisjoint(imprt.columns):
insert_fields |= {field}
else:
if source_field in imprt.columns:
if column_src in imprt.columns:
insert_fields |= {field}

insert_fields -= {fields["unique_dataset_id"]} # Column only used for filling `id_dataset`
Expand Down
6 changes: 3 additions & 3 deletions backend/geonature/core/imports/checks/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from geonature.utils.env import db

from geonature.core.imports.models import ImportUserError, ImportUserErrorType
from geonature.core.imports.models import ImportUserError, ImportUserErrorType, TImports
from geonature.core.imports.utils import generated_fields


Expand Down Expand Up @@ -101,7 +101,7 @@ def __error_replace(*args, **kwargs):
return _error_replace


def report_error(imprt, entity, df, error):
def report_error(imprt: TImports, entity, df, error):
"""
Reports an error found in the dataframe, updates the validity column and insert
the error in the `t_user_errors` table.
Expand Down Expand Up @@ -147,7 +147,7 @@ def report_error(imprt, entity, df, error):
# f'{error_type.name}' # FIXME comment
ordered_invalid_rows = sorted(invalid_rows["line_no"])
column = generated_fields.get(error["column"], error["column"])
column = imprt.fieldmapping.get(column, column)
column = imprt.fieldmapping.get(column, {}).get("column_src", column)
# If an error for same import, same column and of the same type already exists,
# we concat existing erroneous rows with current rows.
stmt = pg_insert(ImportUserError).values(
Expand Down
19 changes: 11 additions & 8 deletions backend/geonature/core/imports/checks/sql/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
Entity,
EntityField,
BibFields,
TImports,
)


__all__ = ["init_rows_validity", "check_orphan_rows"]


def init_rows_validity(imprt):
def init_rows_validity(imprt: TImports):
"""
Validity columns are three-states:
- None: the row does not contains data for the given entity
Expand All @@ -35,9 +36,10 @@ def init_rows_validity(imprt):
# as rows with multi-entity field only will raise an ORPHAN_ROW error
selected_fields_names = []
for field_name, source_field in imprt.fieldmapping.items():
if type(source_field) == list:
selected_fields_names.extend(set(source_field) & set(imprt.columns))
elif source_field in imprt.columns:
column_src = source_field.get("column_src", None)
if type(column_src) == list:
selected_fields_names.extend(set(column_src) & set(imprt.columns))
elif column_src in imprt.columns:
selected_fields_names.append(field_name)
for entity in entities:
# Select fields associated to this entity *and only to this entity*
Expand All @@ -58,15 +60,16 @@ def init_rows_validity(imprt):
)


def check_orphan_rows(imprt):
def check_orphan_rows(imprt: TImports):
transient_table = imprt.destination.get_transient_table()
# TODO: handle multi-source fields
# This is actually not a big issue as multi-source fields are unlikely to also be multi-entity fields.
selected_fields_names = []
for field_name, source_field in imprt.fieldmapping.items():
if type(source_field) == list:
selected_fields_names.extend(set(source_field) & set(imprt.columns))
elif source_field in imprt.columns:
column_src = source_field.get("column_src", None)
if type(column_src) == list:
selected_fields_names.extend(set(column_src) & set(imprt.columns))
elif column_src in imprt.columns:
selected_fields_names.append(field_name)
# Select fields associated to multiple entities
AllEntityField = sa.orm.aliased(EntityField)
Expand Down
2 changes: 1 addition & 1 deletion backend/geonature/core/imports/checks/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def report_erroneous_rows(
transient_table = imprt.destination.get_transient_table()
error_type = ImportUserErrorType.query.filter_by(name=error_type).one()
error_column = generated_fields.get(error_column, error_column)
error_column = imprt.fieldmapping.get(error_column, error_column)
error_column = imprt.fieldmapping.get(error_column, {}).get("column_src", error_column)
if error_type.level in level_validity_mapping:
assert entity is not None
cte = (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,vbourgeois,pop-os,22.11.2024 13:23,file:///home/vbourgeois/.config/libreoffice/4;

0 comments on commit d3b0e0e

Please sign in to comment.