Skip to content

Commit

Permalink
feat: add check_dataset for synthese
Browse files Browse the repository at this point in the history
Reviewed-by: andriacap
  • Loading branch information
andriacap committed Mar 6, 2024
1 parent 8519a81 commit 3210a77
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions backend/geonature/core/gn_synthese/imports/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
check_types,
check_geography,
check_counts,
check_datasets,
)
from geonature.core.imports.checks.sql import (
do_nomenclatures_mapping,
Expand Down Expand Up @@ -69,7 +70,7 @@ def check_transient_data(task, logger, imprt):
def update_batch_progress(batch, step):
start = 0.1
end = 0.4
step_count = 7
step_count = 8
progress = start + ((batch + 1) / batch_count) * (step / step_count) * (end - start)
task.update_state(state="PROGRESS", meta={"progress": progress})

Expand All @@ -90,6 +91,19 @@ def update_batch_progress(batch, step):
)
update_batch_progress(batch, 1)

logger.info(f"[{batch+1}/{batch_count}] Check dataset rows")
with start_sentry_child(op="check.df", description="check datasets rows"):
updated_cols |= check_datasets(
imprt,
entity,
df,
uuid_field=fields["unique_dataset_id"],
id_field=fields["id_dataset"],
module_code="SYNTHESE",
)

update_batch_progress(batch, 2)

logger.info(f"[{batch+1}/{batch_count}] Concat dates…")
with start_sentry_child(op="check.df", description="concat dates"):
updated_cols |= concat_dates(
Expand All @@ -101,17 +115,17 @@ def update_batch_progress(batch, step):
fields["hour_min"].source_field,
fields["hour_max"].source_field,
)
update_batch_progress(batch, 2)
update_batch_progress(batch, 3)

logger.info(f"[{batch+1}/{batch_count}] Check required values…")
with start_sentry_child(op="check.df", description="check required values"):
updated_cols |= check_required_values(imprt, entity, df, fields)
update_batch_progress(batch, 3)
update_batch_progress(batch, 4)

logger.info(f"[{batch+1}/{batch_count}] Check types…")
with start_sentry_child(op="check.df", description="check types"):
updated_cols |= check_types(imprt, entity, df, fields)
update_batch_progress(batch, 4)
update_batch_progress(batch, 5)

logger.info(f"[{batch+1}/{batch_count}] Check geography…")
with start_sentry_child(op="check.df", description="set geography"):
Expand All @@ -129,7 +143,7 @@ def update_batch_progress(batch, step):
codemaille_field=fields["codemaille"],
codedepartement_field=fields["codedepartement"],
)
update_batch_progress(batch, 5)
update_batch_progress(batch, 6)

logger.info(f"[{batch+1}/{batch_count}] Check counts…")
with start_sentry_child(op="check.df", description="check count"):
Expand All @@ -141,12 +155,12 @@ def update_batch_progress(batch, step):
fields["count_max"],
default_count=current_app.config["IMPORT"]["DEFAULT_COUNT_VALUE"],
)
update_batch_progress(batch, 6)
update_batch_progress(batch, 7)

logger.info(f"[{batch+1}/{batch_count}] Updating import data from dataframe…")
with start_sentry_child(op="check.df", description="save dataframe"):
update_transient_data_from_dataframe(imprt, entity, updated_cols, df)
update_batch_progress(batch, 7)
update_batch_progress(batch, 8)

# Checks in SQL
convert_geom_columns(
Expand Down

0 comments on commit 3210a77

Please sign in to comment.