From a30df2d5bd08fcd0d2ed25ab6ced6779e30b7017 Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Sun, 23 Jun 2024 12:54:18 +0300 Subject: [PATCH] More refactoring --- src/scripts/upheno_create_profiles.py | 142 ++++++++------------------ 1 file changed, 43 insertions(+), 99 deletions(-) diff --git a/src/scripts/upheno_create_profiles.py b/src/scripts/upheno_create_profiles.py index c38c3e76..5507c593 100644 --- a/src/scripts/upheno_create_profiles.py +++ b/src/scripts/upheno_create_profiles.py @@ -35,25 +35,15 @@ ws = upheno_config.get_working_directory() robot_opts = upheno_config.get_robot_opts() -blacklisted_upheno_ids = [] overwrite_dosdp_upheno = upheno_config.is_overwrite_upheno_intermediate() - # Data directories original_pattern_dir = os.path.join(ws, "curation/patterns-for-matching/") +upheno_fillers_dir = os.path.join(ws, "curation/upheno-fillers/") upheno_preprocessed_patterns_dir = os.path.join(ws, "curation/changed-patterns/") sspo_matches_dir = os.path.join(ws, "curation/pattern-matches/") - -upheno_profiles_dir = os.path.join(ws, "curation/upheno-profiles/") raw_ontologies_dir = os.path.join(ws, "curation/tmp/") upheno_prepare_dir = os.path.join(ws, "curation/upheno-release-prepare/") -ontology_for_matching_dir = os.path.join(ws, "curation/ontologies-for-matching/") -upheno_fillers_dir = os.path.join(ws, "curation/upheno-fillers/") - -upheno_patterns_data_manual_dir = os.path.join(ws, "patterns/data/default/") -upheno_patterns_main_dir = os.path.join(ws, "patterns/dosdp-patterns/") - -upheno_ontology_dir = os.path.join(ws, "ontology/") cdir(upheno_fillers_dir) cdir(original_pattern_dir) @@ -61,47 +51,34 @@ cdir(sspo_matches_dir) cdir(raw_ontologies_dir) cdir(upheno_prepare_dir) -cdir(ontology_for_matching_dir) # Files -java_fill = os.path.join(ws, "scripts/upheno-filler-pipeline.jar") -java_relationships = os.path.join(ws, "scripts/upheno-relationship-augmentation.jar") -sparql_terms = os.path.join(ws, "sparql/terms.sparql") -sparql_uberon_terms = os.path.join(ws, "sparql/uberon_terms.sparql") -phenotype_classes_sparql = os.path.join(ws, "sparql/phenotype_classes.sparql") -terms_without_labels_sparql = os.path.join(ws, "sparql/terms_without_labels.sparql") -phenotype_pattern = os.path.join(ws, "patterns/dosdp-patterns/phenotype.yaml") -phenotype_pattern_taxon = os.path.join(ws, "patterns/dosdp-patterns/phenotype_taxon.yaml") -phenotype_pattern_taxon_modified = os.path.join( - ws, "patterns/dosdp-patterns/phenotype_taxon_modified.yaml" -) -allimports_merged = os.path.join(raw_ontologies_dir, "upheno-allimports-merged.owl") allimports_dosdp = os.path.join(raw_ontologies_dir, "upheno-allimports-dosdp.owl") -upheno_components_dir = os.path.join(upheno_ontology_dir, "components/") -upheno_core_ontology = os.path.join(upheno_prepare_dir, "upheno-core.owl") -upheno_extra_axioms_ontology = os.path.join(upheno_components_dir, "upheno-extra.owl") -upheno_relations_ontology = os.path.join(upheno_components_dir, "upheno-relations.owl") -upheno_species_neutral_mappings_tsv = os.path.join(upheno_prepare_dir, "upheno-species-independent.sssom.tsv") -upheno_species_neutral_mappings_owl = os.path.join(upheno_prepare_dir, "upheno-species-independent.sssom.owl") - -# generated Files -java_opts = upheno_config.get_robot_java_args() - -print("### Generate change patterns ###") +print("### 1. Rewrite abnormal patterns to 'phenotype' patters") +upheno_patterns_main_dir = os.path.join(ws, "patterns/dosdp-patterns/") generate_rewritten_patterns(upheno_patterns_main_dir=upheno_patterns_main_dir, pattern_dir=original_pattern_dir, upheno_patterns_dir=upheno_preprocessed_patterns_dir) -print("### Preparing a dictionary for DOSDP extraction from the all imports merged ontology.") +print("### 2. Preparing a dictionary for DOSDP extraction from the all imports merged ontology.") +sparql_terms = os.path.join(ws, "sparql/terms.sparql") + # Used to loop up labels in the pattern generation process, so maybe I don't need anything other than rdfs:label? +allimports_merged = os.path.join(raw_ontologies_dir, "upheno-allimports-merged.owl") if upheno_config.is_overwrite_ontologies() or not os.path.exists(allimports_dosdp): robot_prepare_ontology_for_dosdp( - allimports_merged, allimports_dosdp, sparql_terms, timeout=timeout, robot_opts=robot_opts + o=allimports_merged, + ontology_merged_path=allimports_dosdp, + sparql_terms_class_hierarchy=sparql_terms, + timeout=timeout, + robot_opts=robot_opts ) -print("### Loading the existing ID map, the blacklist for uPheno IDs and determining next available uPheno ID.") +print("### 3. Compute the uPheno filler classes.") java_fill = os.path.join(ws, "scripts/upheno-filler-pipeline.jar") +ontology_for_matching_dir = os.path.join(ws, "curation/ontologies-for-matching/") +cdir(ontology_for_matching_dir) compute_upheno_fillers(upheno_config=upheno_config, raw_ontologies_dir=raw_ontologies_dir, upheno_fillers_dir=upheno_fillers_dir, @@ -110,46 +87,50 @@ sspo_matches_dir=sspo_matches_dir, original_pattern_dir=original_pattern_dir) -print("Generating uPheno core (part of uPheno common to all profiles).") +print("### 4. Generating uPheno core.") +upheno_patterns_data_manual_dir = os.path.join(ws, "patterns/data/default/") + # Extra axioms, upheno relations, the manually curated intermediate phenotypes part of the upheno repo -manual_tsv_files = ( - [] -) # the tsv files are generally being kept track of to generate seeds for the profile import modules later upheno_core_manual_phenotypes = create_upheno_core_manual_phenotypes( - manual_tsv_files, allimports_dosdp, timeout=timeout, + manual_tsv_files=[], + allimports_dosdp=allimports_dosdp, + timeout=timeout, overwrite_dosdp_upheno=overwrite_dosdp_upheno, upheno_patterns_dir=upheno_preprocessed_patterns_dir, upheno_prepare_dir=upheno_prepare_dir, upheno_patterns_data_manual_dir=upheno_patterns_data_manual_dir, ) +upheno_ontology_dir = os.path.join(ws, "ontology/") +upheno_components_dir = os.path.join(upheno_ontology_dir, "components/") +upheno_relations_ontology = os.path.join(upheno_components_dir, "upheno-relations.owl") +upheno_extra_axioms_ontology = os.path.join(upheno_components_dir, "upheno-extra.owl") upheno_core_parts = [upheno_extra_axioms_ontology, upheno_relations_ontology] upheno_core_parts.extend(upheno_core_manual_phenotypes) +upheno_core_ontology = os.path.join(upheno_prepare_dir, "upheno-core.owl") if overwrite_dosdp_upheno or not os.path.exists(upheno_core_ontology): robot_merge(ontology_list=upheno_core_parts, ontology_merged_path=upheno_core_ontology, timeout=timeout, robot_opts=robot_opts) -print("Generating uPheno profiles..") +print("### 5. Generating uPheno base..") upheno_combination_id = "all" -print("Generating profile: " + upheno_combination_id) +print("#### 5.1 Merge all tsvs from the ontologies participating in this profiles together") oids = upheno_config.get_upheno_profile_components(upheno_combination_id) -profile_dir = os.path.join(upheno_prepare_dir, upheno_combination_id) +profile_dir = os.path.join(upheno_prepare_dir, "dosdp-data") cdir(profile_dir) -final_upheno_combo_dir = os.path.join(upheno_prepare_dir, upheno_combination_id) -final_upheno_profile_release_dir = os.path.join(upheno_prepare_dir, upheno_combination_id) -cdir(final_upheno_combo_dir) -cdir(final_upheno_profile_release_dir) - -print("Merge all tsvs from the ontologies participating in this profiles together") -export_merged_tsvs_for_combination(profile_dir, oids, +# TODO Since we are just generating one massive thing, maybe we can omit this? +export_merged_tsvs_for_combination(merged_tsv_dir=profile_dir, oids=oids, pattern_dir=original_pattern_dir, upheno_fillers_dir=upheno_fillers_dir) print("Create all top level phenotypes relevant to this profile (SSPO top level classes)") +final_upheno_combo_dir = os.path.join(upheno_prepare_dir, upheno_combination_id) +cdir(final_upheno_combo_dir) + upheno_top_level_phenotypes_ontology = os.path.join( final_upheno_combo_dir, "upheno_top_level_phenotypes.owl" ) @@ -165,6 +146,8 @@ ) if overwrite_dosdp_upheno or not os.path.exists(upheno_top_level_phenotypes_ontology): df_tr = get_taxon_restriction_table(oids, upheno_config) + phenotype_pattern_taxon = os.path.join(upheno_preprocessed_patterns_dir, "phenotype_taxon.yaml") + phenotype_pattern_taxon_modified = os.path.join(upheno_preprocessed_patterns_dir, "phenotype_taxon_modified.yaml") df_tr["modifier"] = df_tr["modifier"].astype(str) df_tr_no_modifier = df_tr[df_tr["modifier"] == "False"] print(str(df_tr_no_modifier[["defined_class", "bearer", "modifier"]])) @@ -201,10 +184,6 @@ # intermediate (i.e. uPheno) layer of this profile, including the core, top-level and upheno-class component upheno_intermediate_ontologies = [upheno_top_level_phenotypes_ontology, upheno_core_ontology] -# These TSVs are purely kept for bookkeeping and to generate the seed in the end for the profile imports module -tsvs = [phenotype_tsv] -tsvs.extend(manual_tsv_files) - # For all tsvs, generate the dosdp instances and drop them in the combo directory print("Profile: Generate uPheno intermediate layer") pattern_names = [] @@ -215,56 +194,21 @@ pattern_name = pattern.replace(".yaml", "") tsv_file = os.path.join(profile_dir, tsv_file_name) if os.path.exists(tsv_file): - tsvs.append(tsv_file) pattern_names.append(pattern_name) outfile = os.path.join(final_upheno_combo_dir, pattern.replace(".yaml", ".owl")) upheno_intermediate_ontologies.append(outfile) first_pattern = os.path.join(final_upheno_combo_dir, pattern_names[0] + ".owl") if overwrite_dosdp_upheno or not os.path.exists(first_pattern): - dosdp_generate_all(pattern_names, profile_dir, - final_upheno_combo_dir, - upheno_preprocessed_patterns_dir, - False, - timeout, + dosdp_generate_all(pattern_names=pattern_names, + data_dir=profile_dir, + data_dir_owl=final_upheno_combo_dir, + pattern_dir=upheno_preprocessed_patterns_dir, + restrict_logical=False, + timeout=timeout, ontology=allimports_dosdp) -print( - "Profile: Collect all ontologies (taxon restricted versions) and their dependencies as needed by this profile." -) -species_components = [] -for oid in oids: - fn = oid + ".owl" - o_base_taxon = os.path.join(raw_ontologies_dir, oid + "-upheno-component.owl") - o_base_class_hierarchy = os.path.join(raw_ontologies_dir, oid + "-class-hierarchy.owl") - species_components.append(o_base_taxon) - species_components.append(o_base_class_hierarchy) - -# create merged main +print("### Profile: Create upheno intermediate layer") upheno_layer_ontology = os.path.join(final_upheno_combo_dir, "upheno_layer.owl") -upheno_species_components_ontology = os.path.join( - final_upheno_combo_dir, "upheno_species_components.owl" -) -upheno_species_components_dependencies_ontology = os.path.join( - final_upheno_combo_dir, "upheno_species_components_dependencies.owl" -) -upheno_species_components_dependencies_seed = os.path.join( - final_upheno_combo_dir, upheno_combination_id + "_seed.txt" -) -upheno_species_components_dependencies_pattern_seed = os.path.join( - final_upheno_combo_dir, upheno_combination_id + "_pattern_seed.txt" -) -upheno_profile_prepare_ontology = os.path.join( - final_upheno_combo_dir, "upheno_pre_" + upheno_combination_id + ".owl" -) -upheno_profile_ontology = os.path.join( - final_upheno_profile_release_dir, "upheno_" + upheno_combination_id + ".owl" -) -upheno_profile_ontology_with_relations = os.path.join( - final_upheno_profile_release_dir, "upheno_" + upheno_combination_id + "_with_relations.owl" -) -# print(upheno_intermediate_ontologies) - -print("Profile: Create upheno intermediate layer") if overwrite_dosdp_upheno or not os.path.exists(upheno_layer_ontology): robot_merge(upheno_intermediate_ontologies, upheno_layer_ontology, timeout, robot_opts)