Skip to content

Commit

Permalink
Merge pull request #114 from Ensembl/update/beta3_handover
Browse files Browse the repository at this point in the history
Update/beta3 handover
  • Loading branch information
vinay-ebi authored Oct 21, 2024
2 parents fd94633 + acfa9c9 commit 9dd0d8c
Show file tree
Hide file tree
Showing 25 changed files with 319 additions and 526 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.1
3.3.0
4 changes: 2 additions & 2 deletions src/ensembl/production/metadata/api/factories/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import sqlalchemy.orm
from ensembl.utils.database.dbconnection import DBConnection
from sqlalchemy.engine import make_url
from sqlalchemy.sql import func

from ensembl.production.metadata.api.exceptions import *
Expand All @@ -41,6 +40,7 @@ def create_all_child_datasets(self, dataset_uuid: str,
topic: str = 'production_process',
status: DatasetStatus = None,
release: EnsemblRelease = None):
# CURRENTLY BROKEN FOR STATUS AND RELEASE. Marc broke it with his last update. Trace back to fix.
# Retrieve the top-level dataset
# Will not work on datasets that are tied to multiple genomes!
# !!!! WILL CREATE THE DATASETS EVEN IF THEY ALREADY EXIST
Expand Down Expand Up @@ -178,7 +178,7 @@ def __create_child_datasets_recursive(self, session, parent_dataset, topic=None,
version = parent_dataset.version
# Create the child dataset
if not exist_ds:
logger.debug(f"Creating dataset {dataset_type.name}/{dataset_source.name}/{status.value}/{release}")
# logger.debug(f"Creating dataset {dataset_type.name}/{dataset_source.name}/{status.value}/{release}")
child_uuid, dataset, attributes, g_dataset = self.create_dataset(session=session,
genome_input=genome_uuid,
dataset_source=dataset_source,
Expand Down
2 changes: 1 addition & 1 deletion src/ensembl/production/metadata/api/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def meta_factory(db_uri, metadata_uri, force=False):
elif '_funcgen_' in db_url.database:
raise Exception("funcgen not implemented yet")
elif '_core_' in db_url.database:
return CoreMetaUpdater(db_uri, metadata_uri, force=force)
return CoreMetaUpdater(db_uri, metadata_uri)
elif '_otherfeatures_' in db_url.database:
raise Exception("otherfeatures not implemented yet")
elif '_rnaseq_' in db_url.database:
Expand Down
3 changes: 2 additions & 1 deletion src/ensembl/production/metadata/api/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import sqlalchemy
from sqlalchemy import Column, Integer, String, text, ForeignKey, Index, JSON
from sqlalchemy.dialects.mysql import DATETIME
from sqlalchemy.dialects.mysql import DATETIME, TINYINT
from sqlalchemy.orm import relationship, backref
from sqlalchemy.sql import func
from sqlalchemy.types import Enum
Expand Down Expand Up @@ -49,6 +49,7 @@ class Attribute(LoadAble, Base):
name = Column(String(128), nullable=False)
label = Column(String(128), nullable=False)
description = Column(String(255))
required = Column(TINYINT(1), nullable=False, default=0)
type = Column(Enum('string', 'percent', 'float', 'integer', 'bp', 'number'), server_default=text("'string'"))
# One to many relationships
# attribute_id within dataset attribute
Expand Down
3 changes: 1 addition & 2 deletions src/ensembl/production/metadata/updater/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@


class BaseMetaUpdater:
def __init__(self, db_uri, metadata_uri, release=None, force=None):
def __init__(self, db_uri, metadata_uri, release=None):
self.db_uri = db_uri
self.force = force
self.metadata_uri = metadata_uri
self.db = DBConnection(self.db_uri)
self.metadata_db = DBConnection(metadata_uri)
Expand Down
226 changes: 94 additions & 132 deletions src/ensembl/production/metadata/updater/core.py

Large diffs are not rendered by default.

12 changes: 9 additions & 3 deletions src/ensembl/production/metadata/updater/updater_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,22 @@
from ensembl.production.metadata.api.models import Attribute, DatasetAttribute


def update_attributes(dataset, attributes, session):
def update_attributes(dataset, attributes, session, replace=False):
# TODO If attributes already exist, update them. Add option to replace all.
dataset_attributes = []
if replace:
for dataset_attribute in dataset.dataset_attributes:
session.delete(dataset_attribute)
session.flush()
for attribute, value in attributes.items():
meta_attribute = session.query(Attribute).filter(Attribute.name == attribute).one_or_none()
if meta_attribute is None:
raise UpdaterException(f"{attribute} does not exist. Add it to the database and reload.")
dataset_attributes.append(DatasetAttribute(
new_dataset_attribute = DatasetAttribute(
value=value,
dataset=dataset,
attribute=meta_attribute,
))
)
session.add(new_dataset_attribute)
dataset_attributes.append(new_dataset_attribute)
return dataset_attributes
23 changes: 12 additions & 11 deletions src/tests/databases/core_1/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@
13 1 assembly.name jaber01
11 1 assembly.ucsc_alias SCARY
15 1 gencode.version 999
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 10029
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 10029
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 10029
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 10029
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date 2023-07-Ensembl
25 1 assembly.alt_accession GCA_0000012345.3
26 \N schema_version 110
26 \N schema_version 110
27 1 genebuild.last_geneset_update 2023-01
23 changes: 12 additions & 11 deletions src/tests/databases/core_2/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
11 1 assembly.ucsc_alias SCARY
15 1 gencode.version 999
16 1 genebuild.last_geneset_update 01
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 6666666
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 666668
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 6666666
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 666668
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
21 1 genome.genome_uuid test
23 1 genebuild.provider_name test2
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
26 1 genebuild.last_geneset_update 2023-01
23 changes: 12 additions & 11 deletions src/tests/databases/core_3/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
11 1 assembly.ucsc_alias SCARYIER
14 1 gencode.version 999
15 1 genebuild.last_geneset_update 2024-02
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 6666666
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 666668
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 6666666
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 666668
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
26 1 genebuild.last_geneset_update 2023-01
23 changes: 12 additions & 11 deletions src/tests/databases/core_4/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
13 1 assembly.name jaber01
11 1 assembly.ucsc_alias SCARYIER
15 1 gencode.version 999
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 6666666
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 666668
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 6666666
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 666668
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS02
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
26 1 genebuild.last_geneset_update 2023-01
19 changes: 10 additions & 9 deletions src/tests/databases/core_5/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
14 1 assembly.default test846
13 1 assembly.name test1
11 1 assembly.ucsc_alias test1
7 1 species.division Ensembl_TEST
6 1 species.production_name test_case_5
4 1 species.scientific_name Hominoide
8 1 species.strain reference
9 1 species.strain_group Hominoide
2 1 species.taxonomy_id 9940
10 1 species.type monsters
5 1 species.url Hominoide
7 1 organism.division Ensembl_TEST
6 1 organism.production_name test_case_5
4 1 organism.scientific_name Hominoide
8 1 organism.strain reference
9 1 organism.strain_group Hominoide
2 1 organism.taxonomy_id 9940
10 1 organism.type monsters
5 1 organism.url Hominoide
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name removed_for_test
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
26 1 genebuild.last_geneset_update 2023-01
23 changes: 12 additions & 11 deletions src/tests/databases/core_6/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
11 1 assembly.ucsc_alias SCARY
15 1 gencode.version 999
16 1 genebuild.last_geneset_update 01
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 6666666
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 666668
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 6666666
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 666668
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
21 1 genome.genome_uuid 90720316-006c-470b-a7dd-82d28f952264
23 1 genebuild.provider_name test
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
26 1 genebuild.last_geneset_update 2023-01
23 changes: 12 additions & 11 deletions src/tests/databases/core_7/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
11 1 assembly.ucsc_alias test_alias
15 1 gencode.version 999
16 1 genebuild.last_geneset_update 01
3 1 species.common_name jabberwocky
7 1 species.division Ensembl_TEST
6 1 species.production_name Jabberwocky
4 1 species.scientific_name carol_jabberwocky
1 1 species.species_taxonomy_id 6666666
8 1 species.strain reference
9 1 species.strain_group testing
2 1 species.taxonomy_id 666668
10 1 species.type monsters
5 1 species.url Jabbe
3 1 organism.common_name jabberwocky
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Jabberwocky
4 1 organism.scientific_name carol_jabberwocky
1 1 organism.species_taxonomy_id 6666666
8 1 organism.strain reference
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 666668
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version ENS01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
Expand All @@ -22,4 +22,5 @@
24 1 genebuild.start_date 2023-08-Ensembl
25 1 genebuild.havana_datafreeze_date test2
26 \N schema_version 110
27 1 assembly.stats.total_coding_sequence_length 8989
27 1 assembly.stats.total_coding_sequence_length 8989
28 1 genebuild.last_geneset_update 2023-01
26 changes: 14 additions & 12 deletions src/tests/databases/core_8/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,22 @@
11 1 assembly.ucsc_alias SCARY
15 1 gencode.version 999
16 1 genebuild.last_geneset_update 01
3 1 species.common_name Caenorhabditis elegans (PRJNA13758)
7 1 species.division Ensembl_TEST
6 1 species.production_name Caenorhabditis_elegans
4 1 species.scientific_name Caenorhabditis elegans
1 1 species.species_taxonomy_id 6239
8 1 species.strain N2
9 1 species.strain_group testing
2 1 species.taxonomy_id 6239
10 1 species.type monsters
5 1 species.url Jabbe
17 1 genebuild.version ENS01
3 1 organism.biosample_id SAMN04256190
3 1 organism.common_name Caenorhabditis elegans (PRJNA13758)
7 1 organism.division Ensembl_TEST
6 1 organism.production_name Caenorhabditis_elegans
4 1 organism.scientific_name Caenorhabditis elegans
1 1 organism.species_taxonomy_id 6239
8 1 organism.strain N2
9 1 organism.strain_group testing
2 1 organism.taxonomy_id 6239
10 1 organism.type monsters
5 1 organism.url Jabbe
17 1 genebuild.version EXT01
18 1 genebuild.sample_gene ENSAMXG00005000318
19 1 genebuild.sample_location KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date 2023-07-Ensembl
25 \N schema_version 110
25 \N schema_version 110
29 1 genebuild.last_geneset_update 2023-01
3 changes: 0 additions & 3 deletions src/tests/databases/core_9/attrib_type.txt

This file was deleted.

1 change: 0 additions & 1 deletion src/tests/databases/core_9/coord_system.txt

This file was deleted.

Loading

0 comments on commit 9dd0d8c

Please sign in to comment.