Skip to content

Commit

Permalink
Merge pull request #111 from Ensembl/chore/add_stats_prefix
Browse files Browse the repository at this point in the history
Add `.stats` Prefix
  • Loading branch information
bilalebi authored Oct 10, 2024
2 parents 31ef593 + 06a5d35 commit f194674
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 158 deletions.
11 changes: 1 addition & 10 deletions protos/ensembl/production/metadata/grpc/ensembl_metadata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ message AttributesInfo {
string genebuild_method = 1;
string genebuild_method_display = 2;
string genebuild_last_geneset_update = 3;
string genebuild_version = 4;
string genebuild_provider_version = 4;
string genebuild_provider_name = 5;
string genebuild_provider_url = 6;
string genebuild_sample_gene = 7;
Expand Down Expand Up @@ -383,15 +383,6 @@ message GenomeUUIDRequest {
double release_version = 3; // Optional
}

/*
Genome keyword filter.
If release_version is not given, the current version is used.
*/
message GenomeByKeywordRequest {
string keyword = 1; // Mandatory
double release_version = 2; // Optional
}

/*
Genome specific keyword filter.
If release_version is not given, the current version is used.
Expand Down
4 changes: 2 additions & 2 deletions src/ensembl/production/metadata/grpc/client_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,10 @@ def get_datasets_attributes_values_by_genome_uuid(stub):
request3 = DatasetAttributesValuesRequest(
genome_uuid="a73351f7-93e7-11ec-a39d-005056b38ce3",
dataset_type="homologies",
attribute_name=["compara.homology_coverage"]
attribute_name=["compara.stats.homology_coverage"]
)
attributes3 = stub.GetAttributesValuesByUUID(request3)
print("**** Dataset Attributes Values: By genome_uuid, dataset_type='homologies' and attribute_name=['compara.homology_coverage'] ****")
print("**** Dataset Attributes Values: By genome_uuid, dataset_type='homologies' and attribute_name=['compara.stats.homology_coverage'] ****")
print(attributes3)

request4 = DatasetAttributesValuesRequest(
Expand Down
134 changes: 66 additions & 68 deletions src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/ensembl/production/metadata/grpc/protobuf_msg_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def create_attributes_info(data=None):
genebuild_method=required_attributes["genebuild.method"],
genebuild_method_display=required_attributes["genebuild.method_display"],
genebuild_last_geneset_update=required_attributes["genebuild.last_geneset_update"],
genebuild_version=required_attributes["genebuild.provider_version"],
genebuild_provider_version=required_attributes["genebuild.provider_version"],
genebuild_provider_name=required_attributes["genebuild.provider_name"],
genebuild_provider_url=required_attributes["genebuild.provider_url"],
genebuild_sample_gene=required_attributes["genebuild.sample_gene"],
Expand Down Expand Up @@ -399,7 +399,7 @@ def populate_dataset_info(data=None):
dataset_source_type=ds_item.dataset.dataset_source.type if ds_item.dataset.dataset_source else "",
dataset_type_name=ds_item.dataset.dataset_type.name,
release_version=float(data.release.version) if data.release.version else None,
release_date=datetime.strftime(data.release.release_date, "%m/%d/%Y"),
release_date=datetime.strftime(data.release.release_date, "%m/%d/%Y") if data.release.release_date else None,
release_type=data.release.release_type,
)
ds_obj_list.append(ds_info)
Expand Down
2 changes: 1 addition & 1 deletion src/tests/databases/core_7/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
24 1 genebuild.start_date 2023-08-Ensembl
25 1 genebuild.havana_datafreeze_date test2
26 \N schema_version 110
27 1 assembly.total_coding_sequence_length 8989
27 1 assembly.stats.total_coding_sequence_length 8989
2 changes: 1 addition & 1 deletion src/tests/databases/core_9/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
20 1 strain.type test
23 1 genome.genome_uuid a733550b-93e7-11ec-a39d-005056b38ce3
24 1 genebuild.havana_datafreeze_date test2
25 1 assembly.total_genome_length 546
25 1 assembly.stats.total_genome_length 546
26 1 genebuild.start_date 2023-07-Ensembl
27 \N schema_version 110
116 changes: 58 additions & 58 deletions src/tests/databases/ensembl_genome_metadata/attribute.txt
Original file line number Diff line number Diff line change
@@ -1,101 +1,101 @@
1 assembly.accession assembly.accession assembly.accession string
2 assembly.chromosomes Chromosomes or plasmids Number of structures in cells containing DNA integer
3 assembly.component_sequences Component sequences Part of the primary sequences in assembly integer
4 assembly.contig_n50 Contig N50 Median size of contigs in a genome assembly bp
2 assembly.stats.chromosomes Chromosomes or plasmids Number of structures in cells containing DNA integer
3 assembly.stats.component_sequences Component sequences Part of the primary sequences in assembly integer
4 assembly.stats.contig_n50 Contig N50 Median size of contigs in a genome assembly bp
5 assembly.date assembly.date assembly.date string
6 assembly.default assembly.default assembly.default string
7 assembly.gc_percentage Average GC content Percentage of nucleotides in DNA that are G or C percent
7 assembly.stats.gc_percentage Average GC content Percentage of nucleotides in DNA that are G or C percent
8 assembly.is_reference assembly.is_reference assembly.is_reference string
9 assembly.level assembly.level assembly.level string
10 assembly.mapping assembly.mapping assembly.mapping string
11 assembly.name assembly.name assembly.name string
12 assembly.provider_name assembly.provider_name assembly.provider_name string
13 assembly.provider_url assembly.provider_url assembly.provider_url string
14 assembly.spanned_gaps Spanned gaps Number of gaps covered by sequencing reads integer
14 assembly.stats.spanned_gaps Spanned gaps Number of gaps covered by sequencing reads integer
15 assembly.tolid assembly.tolid assembly.tolid string
16 assembly.toplevel_sequences Top level sequences Primary sequences in a genome assembly integer
17 assembly.total_coding_sequence_length Total coding sequence length Total length of all coding sequences bp
18 assembly.total_gap_length Total gap length Total length of all gaps in a genome assembly bp
19 assembly.total_genome_length Total genome length Total length of all genomic sequences bp
16 assembly.stats.toplevel_sequences Top level sequences Primary sequences in a genome assembly integer
17 assembly.stats.total_coding_sequence_length Total coding sequence length Total length of all coding sequences bp
18 assembly.stats.total_gap_length Total gap length Total length of all gaps in a genome assembly bp
19 assembly.stats.total_genome_length Total genome length Total length of all genomic sequences bp
20 assembly.ucsc_alias assembly.ucsc_alias assembly.ucsc_alias string
21 genebuild.average_cds_length Average CDS length Average length of coding sequences float
21 genebuild.stats.average_cds_length Average CDS length Average length of coding sequences float
22 genebuild.average_coding_exons_per_coding_gene Average coding exons per coding gene Average coding exons per coding gene string
23 genebuild.average_coding_exons_per_transcript Average coding exons per transcript Average coding exons per coding transcript float
24 genebuild.average_coding_exon_length Average exon length per coding gene Average length of coding exons bp
25 genebuild.average_exon_length Average exon length Average length of exons bp
26 genebuild.average_genomic_span Average coding genomic span Average length of all genomic regions bp
27 genebuild.average_intron_length Average intron length Average intron length per coding gene bp
23 genebuild.stats.average_coding_exons_per_transcript Average coding exons per transcript Average coding exons per coding transcript float
24 genebuild.stats.average_coding_exon_length Average exon length per coding gene Average length of coding exons bp
25 genebuild.stats.average_exon_length Average exon length Average length of exons bp
26 genebuild.stats.average_genomic_span Average coding genomic span Average length of all genomic regions bp
27 genebuild.stats.average_intron_length Average intron length Average intron length per coding gene bp
28 genebuild.average_sequence_legth Average coding sequence length Average length of sequences in genome bp
29 genebuild.coding_genes Coding genes Genes that code for proteins integer
30 genebuild.coding_transcripts Coding transcripts Transcripts that code for proteins integer
31 genebuild.coding_transcripts_per_gene Average coding transcripts per gene Average coding transcripts per gene float
29 genebuild.stats.coding_genes Coding genes Genes that code for proteins integer
30 genebuild.stats.coding_transcripts Coding transcripts Transcripts that code for proteins integer
31 genebuild.stats.coding_transcripts_per_gene Average coding transcripts per gene Average coding transcripts per gene float
32 genebuild.hash genebuild.hash genebuild.hash string
33 genebuild.initial_release_date genebuild.initial_release_date genebuild.initial_release_date string
34 genebuild.last_geneset_update genebuild.last_geneset_update genebuild.last_geneset_update string
35 genebuild.level genebuild.level genebuild.level string
36 genebuild.longest_gene_length Longest coding gene Length of longest gene bp
36 genebuild.stats.longest_gene_length Longest coding gene Length of longest gene bp
37 genebuild.method genebuild.method genebuild.method string
38 genebuild.method_display genebuild.method_display genebuild.method_display string
39 genebuild.nc_average_exons_per_transcript Average exons per non-coding transcript Mean exon count per transcript float
40 genebuild.nc_average_exon_length Average exon length per non-coding transcript Mean exon length bp
41 genebuild.nc_average_genomic_span Average non-coding genomic span Mean length of all genomic regions bp
42 genebuild.nc_average_sequence_length Average non-coding sequence length Mean length of all sequences bp
43 genebuild.nc_longest_gene_length Longest non-coding gene Length of longest non-coding gene bp
44 genebuild.nc_long_non_coding_genes Long non-coding genes Long genes not coding for proteins integer
45 genebuild.nc_misc_non_coding_genes Misc. non-coding genes Miscellaneous non-coding genes integer
46 genebuild.nc_non_coding_genes Non-coding genes Genes that don't code for proteins integer
47 genebuild.nc_shortest_gene_length Shortest non-coding gene Length of shortest gene bp
48 genebuild.nc_small_non_coding_genes Small non-coding genes Small genes not coding for proteins integer
49 genebuild.nc_total_introns Introns in non-coding genes Total intron count integer
50 genebuild.nc_total_transcripts Non-coding transcripts Total RNA transcript count integer
51 genebuild.nc_transcripts_per_gene Average transcripts per non-coding gene Mean transcripts count per gene float
52 genebuild.ps_average_exons_per_transcript Average intron length per pseudogene Mean exon count per pseudogene transcript float
53 genebuild.ps_average_exon_length Average exon length per pseudogene Mean pseudogene exon length bp
54 genebuild.ps_average_genomic_span Average pseudogene genomic span Mean length of pseudogene regions bp
55 genebuild.ps_average_intron_length Average intron length per pseudogene Mean pseudogene intron length bp
56 genebuild.ps_average_sequence_length Average pseudogene sequence length Mean length of pseudogene sequences bp
57 genebuild.ps_longest_gene_length Longest pseudogene Length of longest pseudogene bp
58 genebuild.ps_pseudogenes Pseudogenes Genes which don't code functional protiens integer
59 genebuild.ps_shortest_gene_length Shortest pseudogene Length of shortest pseudogene bp
60 genebuild.ps_total_exons Exons in pseudogenes Total exon count in pseudogenes integer
61 genebuild.ps_total_introns Introns in pseudogenes Total intron count in pseudogenes integer
62 genebuild.ps_total_transcripts Transcripts in pseudogenes Total pseudogene RNA transcript count integer
63 genebuild.ps_transcripts_per_gene Average transcripts per pseudogene Mean pseudogene transcripts count per pseudogene float
64 genebuild.shortest_gene_length Shortest coding gene Length of shortest gene bp
39 genebuild.stats.nc_average_exons_per_transcript Average exons per non-coding transcript Mean exon count per transcript float
40 genebuild.stats.nc_average_exon_length Average exon length per non-coding transcript Mean exon length bp
41 genebuild.stats.nc_average_genomic_span Average non-coding genomic span Mean length of all genomic regions bp
42 genebuild.stats.nc_average_sequence_length Average non-coding sequence length Mean length of all sequences bp
43 genebuild.stats.nc_longest_gene_length Longest non-coding gene Length of longest non-coding gene bp
44 genebuild.stats.nc_long_non_coding_genes Long non-coding genes Long genes not coding for proteins integer
45 genebuild.stats.nc_misc_non_coding_genes Misc. non-coding genes Miscellaneous non-coding genes integer
46 genebuild.stats.nc_non_coding_genes Non-coding genes Genes that don't code for proteins integer
47 genebuild.stats.nc_shortest_gene_length Shortest non-coding gene Length of shortest gene bp
48 genebuild.stats.nc_small_non_coding_genes Small non-coding genes Small genes not coding for proteins integer
49 genebuild.stats.nc_total_introns Introns in non-coding genes Total intron count integer
50 genebuild.stats.nc_total_transcripts Non-coding transcripts Total RNA transcript count integer
51 genebuild.stats.nc_transcripts_per_gene Average transcripts per non-coding gene Mean transcripts count per gene float
52 genebuild.stats.ps_average_exons_per_transcript Average intron length per pseudogene Mean exon count per pseudogene transcript float
53 genebuild.stats.ps_average_exon_length Average exon length per pseudogene Mean pseudogene exon length bp
54 genebuild.stats.ps_average_genomic_span Average pseudogene genomic span Mean length of pseudogene regions bp
55 genebuild.stats.ps_average_intron_length Average intron length per pseudogene Mean pseudogene intron length bp
56 genebuild.stats.ps_average_sequence_length Average pseudogene sequence length Mean length of pseudogene sequences bp
57 genebuild.stats.ps_longest_gene_length Longest pseudogene Length of longest pseudogene bp
58 genebuild.stats.ps_pseudogenes Pseudogenes Genes which don't code functional protiens integer
59 genebuild.stats.ps_shortest_gene_length Shortest pseudogene Length of shortest pseudogene bp
60 genebuild.stats.ps_total_exons Exons in pseudogenes Total exon count in pseudogenes integer
61 genebuild.stats.ps_total_introns Introns in pseudogenes Total intron count in pseudogenes integer
62 genebuild.stats.ps_total_transcripts Transcripts in pseudogenes Total pseudogene RNA transcript count integer
63 genebuild.stats.ps_transcripts_per_gene Average transcripts per pseudogene Mean pseudogene transcripts count per pseudogene float
64 genebuild.stats.shortest_gene_length Shortest coding gene Length of shortest gene bp
65 genebuild.start_date genebuild.start_date genebuild.start_date string
66 genebuild.total_coding_exons Exons in coding genes Total number of coding exons integer
67 genebuild.total_exons Exons in genes Total number of exons integer
68 genebuild.total_introns Introns in coding genes Total number of introns integer
69 genebuild.total_transcripts Transcripts in coding genes Total number of RNA transcripts integer
70 genebuild.transcripts_per_gene Average transcripts per coding gene Average number of transcripts per gene float
66 genebuild.stats.total_coding_exons Exons in coding genes Total number of coding exons integer
67 genebuild.stats.total_exons Exons in genes Total number of exons integer
68 genebuild.stats.total_introns Introns in coding genes Total number of introns integer
69 genebuild.stats.total_transcripts Transcripts in coding genes Total number of RNA transcripts integer
70 genebuild.stats.transcripts_per_gene Average transcripts per coding gene Average number of transcripts per gene float
71 genebuild.version genebuild.version genebuild.version string
72 genebuild.sample_gene genebuild.sample_gene Sample Gene Data string
73 genebuild.sample_location genebuild.sample_location Sample Location Data string
74 assembly.coverage_depth assembly.coverage_depth assembly.coverage_depth string
75 assembly.web_accession_source assembly.web_accession_source assembly.web_accession_source string
76 assembly.web_accession_type assembly.web_accession_type assembly.web_accession_type string
77 genebuild.id genebuild.id genebuild.id string
78 genebuild.nc_average_intron_length Average intron length per non-coding transcript Mean intron length bp
78 genebuild.stats.nc_average_intron_length Average intron length per non-coding transcript Mean intron length bp
79 genebuild.projection_source_db genebuild.projection_source_db genebuild.projection_source_db string
80 assembly.long_name assembly.long_name assembly.long_name string
81 assembly.url_name assembly.url_name assembly.url_name string
82 genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date string
83 assembly.version assembly.version assembly.version string
84 genebuild.provider_name genebuild.provider_name genebuild.provider_name string
85 genebuild.provider_url genebuild.provider_url genebuild.provider_url string
119 variation.short_variants Short variants Small-scale genetic variations integer
119 variation.stats.short_variants Short variants Small-scale genetic variations integer
120 variation.sample_variant variation.sample_variant variation.sample_variant string
123 variation.short_variants_with_phenotype_assertions Short variation with phenotype assertion Short variation with phenotype assertion string
161 compara.homology_coverage compara.homology_coverage compara.homology_coverage float
123 variation.stats.short_variants_with_phenotype_assertions Short variation with phenotype assertion Short variation with phenotype assertion string
161 compara.stats.homology_coverage compara.stats.homology_coverage compara.stats.homology_coverage float
162 compara.homology_reference_species compara.homology_reference_species compara.homology_reference_species string
163 regulation.open_chromatin_count regulation.open_chromatin_count Number of open chromatin regions integer
164 regulation.promoter_count regulation.promoter_count Number of promoters integer
165 regulation.enhancer_count regulation.enhancer_count Number of enhancers integer
166 regulation.ctcf_count regulation.ctcf_count Number of CTCF binding sites integer
164 regulation.stats.promoter_count regulation.stats.promoter_count Number of promoters integer
165 regulation.stats.enhancer_count regulation.stats.enhancer_count Number of enhancers integer
166 regulation.stats.ctcf_count regulation.stats.ctcf_count Number of CTCF binding sites integer
167 regulation.tfbs_count regulation.tfbs_count Number of regions enriched for transcription factor binding integer
168 assembly.tol_id assembly.tol_id assembly.tol_id string
169 genebuild.annotation_source genebuild.annotation_source genebuild.annotation_source string
170 genebuild.nc_total_exons Exons in non-coding genes Total exon count integer
170 genebuild.stats.nc_total_exons Exons in non-coding genes Total exon count integer
179 assembly.description assembly.description assembly.description string
180 assembly.master_accession assembly.master_accession assembly.master_accession string
181 assembly.alt_accession assembly.alt_accession assembly.alt_accession string
Expand Down
Loading

0 comments on commit f194674

Please sign in to comment.