Skip to content

Commit

Permalink
Taxref v18 : Import + Migrate
Browse files Browse the repository at this point in the history
  • Loading branch information
amandine-sahl committed Jan 16, 2025
1 parent da5b346 commit ad46fbb
Show file tree
Hide file tree
Showing 11 changed files with 854 additions and 21 deletions.
31 changes: 31 additions & 0 deletions apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""[taxref] Taxref v18
Revision ID: da3172cecdb1
Revises: 2c68a907f74c
Create Date: 2025-01-14 11:44:12.356028
"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "da3172cecdb1"
down_revision = "2c68a907f74c"
branch_labels = None
depends_on = None


def upgrade():
op.add_column(table_name="taxref", column=sa.Column("cd_ba", sa.Integer()), schema="taxonomie")
op.add_column(
table_name="taxref",
column=sa.Column("nomenclatural_comment", sa.String(500)),
schema="taxonomie",
)


def downgrade():
op.drop_column(table_name="taxref", column_name="cd_ba", schema="taxonomie")
op.drop_column(table_name="taxref", column_name="nomenclatural_comment", schema="taxonomie")
199 changes: 199 additions & 0 deletions apptax/taxonomie/commands/migrate_taxref/commands_v18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import importlib
import click
from zipfile import ZipFile
from sqlalchemy import text
from flask.cli import with_appcontext

from utils_flask_sqla.migrations.utils import open_remote_file

from apptax.database import db
from apptax.taxonomie.commands.utils import (
copy_from_csv,
truncate_bdc_statuts,
refresh_taxref_vm,
insert_taxref_numversion,
)
from apptax.taxonomie.commands.taxref_v18 import import_bdc_statuts_v18
from .utils import save_data, analyse_taxref_changes
from . import logger


base_url = "http://geonature.fr/data/inpn/taxonomie/"


@click.group(help="Migrate to TaxRef v18.")
def migrate_to_v18():
pass


@migrate_to_v18.command()
@with_appcontext
def import_taxref_v18():
"""
Procédure de migration de taxref vers la version 18
Test de la disparition des cd_noms
"""
# Prerequis : deps_test_fk_dependencies_cd_nom
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.changes_detection",
"0.2_taxref_detection_repercussion_disparition_cd_nom.sql",
)
)
db.session.execute(query)

# import taxref v18 data
import_data_taxref_v18()
db.session.commit()

# Analyse des changements à venir
analyse_taxref_changes()


@migrate_to_v18.command()
@click.option("--keep-cdnom", is_flag=True)
@with_appcontext
def test_changes_detection(keep_cdnom):
"""Analyse des répercussions de changement de taxref
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer
:type keep-cdnom: boolean
3 étapes :
- Detection des cd_noms manquants
- Création d'une copie de travail de bib_noms
- Analyse des modifications taxonomique (split, merge, ...) et
de leur répercussion sur les attributs et medias de taxhub
"""
# Analyse des changements à venir
analyse_taxref_changes(keep_missing_cd_nom=keep_cdnom)


@migrate_to_v18.command()
@click.option("--keep-oldtaxref", is_flag=True)
@click.option("--keep-oldbdc", is_flag=True)
@click.option("--keep-cdnom", is_flag=True)
@click.option("--taxref-region", type=str)
@click.option("--script_predetection", type=click.Path(exists=True))
@click.option("--script_postdetection", type=click.Path(exists=True))
@with_appcontext
def apply_changes(
keep_oldtaxref,
keep_oldbdc,
keep_cdnom,
taxref_region,
script_predetection,
script_postdetection,
):
"""Procédure de migration de taxref vers la version 18
Application des changements import des données dans les tables taxref et bdc_status
:param keep-oldtaxref: Indique si l'on souhaite concerver l'ancienne version du referentiel taxref
:type keep-oldtaxref: boolean
:param keep-oldbdc: Indique si l'on souhaite concerver l'ancienne version du referentiel bdc_status
:type keep-oldbdc: boolean
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer
:type keep-cdnom: boolean
:param script_predetection: Emplacement d'un fichier sql de correction avant la detection des changements
:type script_predetection: Path
:param script_postdetection: Emplacement d'un fichier sql de correction après la detection des changements
:type script_postdetection: Path
"""

# Analyse des changements à venir
analyse_taxref_changes(
keep_missing_cd_nom=keep_cdnom,
script_predetection=script_predetection,
script_postdetection=script_postdetection,
)

# Save taxref and bdc_status data
save_data(17, keep_oldtaxref, keep_oldbdc)

# Update taxref v18
logger.info("Migration of taxref ...")
try:
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18",
"3.2_alter_taxref_data.sql",
)
)
db.session.execute(query, {"keep_cd_nom": keep_cdnom, "taxref_region": taxref_region})
db.session.commit()
logger.info("it's done")
except Exception as e:
logger.error(str(e))

# Import bdc status data and insert into taxhub tables
import_and_format_dbc_status()

# Clean DB
logger.info("Clean DB")
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data", "5_clean_db.sql"
)
)
db.session.execute(query)

logger.info("Refresh materialized views…")
refresh_taxref_vm()

insert_taxref_numversion(18)
db.session.commit()


def import_data_taxref_v18():
"""
Import des données brutes de taxref v18 en base
avant leur traitement
"""
print("sdfsdfsdf")
logger.info("Import TAXREFv18 into tmp table…")

# Préparation création de table temporaire permettant d'importer taxref
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18",
"0_taxref_import_data.sql",
)
)
db.session.execute(query)
db.session.commit()

with open_remote_file(base_url, "TAXREF_v18_2025.zip", open_fct=ZipFile) as archive:
with archive.open("TAXREFv18.txt") as f:
logger.info("Insert TAXREFv18 into taxonomie.import_taxref table…")
copy_from_csv(
f,
table_name="import_taxref",
delimiter="\t",
)
with archive.open("CDNOM_DISPARUS.txt") as f:
logger.info("Insert missing cd_nom into taxonomie.cdnom_disparu table…")
copy_from_csv(
f,
table_name="cdnom_disparu",
delimiter="\t",
)

with archive.open("rangs_note.csv") as f:
logger.info("Insert rangs_note tmp table…")
copy_from_csv(
f,
table_name="import_taxref_rangs",
encoding="WIN1252",
delimiter=";",
)


def import_and_format_dbc_status():
"""
Import des données brutes de la base bdc_status en base
Puis traitement des données de façon à les ventiler dans les différentes tables
"""
pass
# truncate_bdc_statuts()
# import_bdc_statuts_v18(logger)
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
-- Créer la table import_taxref


DROP TABLE IF EXISTS taxonomie.import_taxref;
CREATE TABLE taxonomie.import_taxref
(
regne character varying(20),
phylum character varying(50),
classe character varying(50),
ordre character varying(50),
famille character varying(50),
sous_famille character varying(50),
tribu character varying(50),
group1_inpn character varying(50),
group2_inpn character varying(50),
group3_inpn character varying(50),
cd_nom integer NOT NULL,
cd_taxsup integer,
cd_sup integer,
cd_ref integer,
cd_ba integer,
rang character varying(10),
lb_nom character varying(100),
lb_auteur character varying(500),
nomenclatural_comment character varying(500),
nom_complet character varying(500),
nom_complet_html character varying(500),
nom_valide character varying(500),
nom_vern text,
nom_vern_eng character varying(500),
habitat character varying(10),
fr character varying(10),
gf character varying(10),
mar character varying(10),
gua character varying(10),
sm character varying(10),
sb character varying(10),
spm character varying(10),
may character varying(10),
epa character varying(10),
reu character varying(10),
sa character varying(10),
ta character varying(10),
taaf character varying(10),
pf character varying(10),
nc character varying(10),
wf character varying(10),
cli character varying(10),
url text,
url_inpn text
);

ALTER TABLE taxonomie.import_taxref ADD CONSTRAINT pk_import_taxref PRIMARY KEY (cd_nom);

-- Créer la table cdnom_disparus
DROP TABLE IF EXISTS taxonomie.cdnom_disparu;
CREATE TABLE taxonomie.cdnom_disparu (
CD_NOM int,
PLUS_RECENTE_DIFFUSION character varying(50),
CD_NOM_REMPLACEMENT int,
CD_RAISON_SUPPRESSION int,
RAISON_SUPPRESSION text
);


DROP TABLE IF EXISTS taxonomie.import_taxref_rangs;
CREATE TABLE taxonomie.import_taxref_rangs (
level int NOT NULL,
rang varchar(20) NOT NULL,
detail_fr varchar(50) NOT NULL,
detail_en varchar(50) NOT NULL
);
Loading

0 comments on commit ad46fbb

Please sign in to comment.