Skip to content

Commit

Permalink
fix 424 - bug in agat_sp_complement_annotations.pl / check_feature_ov…
Browse files Browse the repository at this point in the history
…erlap_from_l3_to_l1 (#425)

* fix check_feature_overlap_from_l3_to_l1. fix #424

* add a new test for agat_sp_complement_annotations
  • Loading branch information
Juke34 authored Feb 13, 2024
1 parent bea962b commit 180374e
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 114 deletions.
227 changes: 113 additions & 114 deletions lib/AGAT/OmniscientTool.pm
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ sub complement_omniscients {
my ($omniscient1, $omniscient2, $size_min, $verbose)=@_;

my %add_omniscient;

$size_min=0 if (! $size_min);

if(! $verbose){$verbose=0;}
Expand All @@ -152,13 +151,12 @@ sub complement_omniscients {

#If location_to_check start if over the end of the reference location, we stop
if($location2->[1] > $location->[2]) {last;}
print "location_to_check start if over the end of the reference location.\n" if ($verbose >= 3);

#If location_to_check end if inferior to the start of the reference location, we continue next
if($location2->[2] < $location->[1]) {next;}
print "location_to_check start if inferior to the start of the reference location.\n" if ($verbose >= 3);

# Let's check at Gene LEVEL
print "location overlap at gene level check now level3.\n" if ($verbose >= 3);
if( location_overlap($location, $location2) ){ #location overlap at gene level check now level3
#let's check at CDS level (/!\ id1_l1 is corresponding to id from $omniscient2)
if(check_feature_overlap_from_l3_to_l1($omniscient2, $omniscient1, $id1_l1, $id2_l1)){ #If contains CDS it has to overlap at CDS level, otherwise any type of feature level3 overlaping is sufficient to decide that they overlap
Expand Down Expand Up @@ -2060,133 +2058,134 @@ sub check_gene_overlap_at_level3{
# if no l3 check at l2
# then if no l2 check at l1
sub check_feature_overlap_from_l3_to_l1{
my ($hash_omniscient, $hash_omniscient2, $gene_id, $gene_id2)=@_;
my ($hash_omniscient, $hash_omniscient2, $gene_id, $gene_id2)=@_;

my $level3_global=0;
my $overlap_ft = undef;
my $level2_overlap=undef;
my $exist_l2A=undef;
my $exist_l2B=undef;
my $level3_global=0;
my $overlap_ft = undef;
my $level2_overlap=undef;
my $exist_l2A=undef;
my $exist_l2B=undef;

# check at L3
# check at L3
foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){

if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id)))){
$exist_l2A=1;
if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id)))){
$exist_l2A=1;

foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{$l2_type}{lc($gene_id)}}){
my $mrna_id1 = $mrna_feature->_tag_value('ID');

if(exists_keys($hash_omniscient2,('level2', $l2_type, lc($gene_id2)))){
# from here both feature level2 are the same type
foreach my $mrna_feature2 (@{$hash_omniscient2->{'level2'}{$l2_type}{lc($gene_id2)}}){
my $mrna_id2 = $mrna_feature2->_tag_value('ID');

# from here both feature level2 are the same type
foreach my $mrna_feature2 (@{$hash_omniscient2->{'level2'}{$l2_type}{lc($gene_id2)}}){
my $mrna_id2 = $mrna_feature2->_tag_value('ID');
# check overlap in case for later if Level3 test not possible
if(($mrna_feature2->start <= $mrna_feature->end) and ($mrna_feature2->end >= $mrna_feature->start )){ # they overlap
$level2_overlap=$l2_type;
}

# check overlap in case for later if LEvel3 test not possible
if(($mrna_feature2->start <= $mrna_feature->end) and ($mrna_feature2->end >= $mrna_feature->start )){ # they overlap
$level2_overlap=$l2_type;
}
#check all cds pieces - CDS against CDS
my $cds_local=0;
$cds_local++ if ( exists_keys($hash_omniscient,('level3', 'cds', lc($mrna_id1))));
$cds_local++ if ( exists_keys($hash_omniscient2,('level3', 'cds', lc($mrna_id2))));
$level3_global += $cds_local;
if( $cds_local == 2) {
foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id1)}}){
foreach my $cds_feature2 (@{$hash_omniscient2->{'level3'}{'cds'}{lc($mrna_id2)}}){
if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap
$overlap_ft = "cds";
last;
}
}
last if ($overlap_ft);
}
}

#check all cds pieces - CDS against CDS
my $cds_local=0;
$cds_local++ if ( exists_keys($hash_omniscient,('level3', 'cds', lc($mrna_id1))));
$cds_local++ if ( exists_keys($hash_omniscient,('level3', 'cds', lc($mrna_id2))));
$level3_global += $cds_local;
if( $cds_local == 2) {
foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id1)}}){
foreach my $cds_feature2 (@{$hash_omniscient2->{'level3'}{'cds'}{lc($mrna_id2)}}){
if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap
$overlap_ft = "cds";
last;
}
}
last if ($overlap_ft);
}
}
# No CDS, check at exon
elsif(!$cds_local){
# check all exon pieces - exon against exon
my $exon_local=0;
$exon_local++ if ( exists_keys($hash_omniscient,('level3', 'exon', lc($mrna_id1))));
$exon_local++ if ( exists_keys($hash_omniscient,('level3', 'exon', lc($mrna_id2))));
$level3_global += $exon_local;
if( $exon_local == 2) {
foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'exon'}{lc($mrna_id1)}}){
foreach my $cds_feature2 (@{$hash_omniscient2->{'level3'}{'exon'}{lc($mrna_id2)}}){
if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap
$overlap_ft = "exon";
last;
}
}
last if ($overlap_ft);
}
}
# No CDS, No exon, check at other feature types
elsif(!$exon_local){
foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){
if ($tag_l3 ne "cds" or $tag_l3 ne "exon"){
my $l3_local=0;
$l3_local++ if ( exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id1))));
$l3_local++ if ( exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id2))));
$level3_global += $l3_local;
if( $l3_local == 2) {
foreach my $feature1 (@{$hash_omniscient->{'level3'}{$tag_l3}{lc($mrna_id1)}}){
foreach my $feature2 (@{$hash_omniscient2->{'level3'}{$tag_l3}{lc($mrna_id2)}}){
if(($feature2->start <= $feature1->end) and ($feature2->end >= $feature1->start )){ # they overlap
$overlap_ft = $tag_l3;
last;
}
last if ($overlap_ft);
}
}
}
last if ($overlap_ft);
}
}
}
}
last if ($overlap_ft);
}
# No CDS, check at exon
elsif(!$cds_local){
# check all exon pieces - exon against exon
my $exon_local=0;
$exon_local++ if ( exists_keys($hash_omniscient,('level3', 'exon', lc($mrna_id1))));
$exon_local++ if ( exists_keys($hash_omniscient,('level3', 'exon', lc($mrna_id2))));
$level3_global += $exon_local;
if( $exon_local == 2) {
foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'exon'}{lc($mrna_id1)}}){
foreach my $cds_feature2 (@{$hash_omniscient2->{'level3'}{'exon'}{lc($mrna_id2)}}){
if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap
$overlap_ft = "exon";
last;
}
}
last if ($overlap_ft);
}
}
# No CDS, No exon, check at other feature types
elsif(!$exon_local){
foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){
if ($tag_l3 ne "cds" or $tag_l3 ne "exon"){
my $l3_local=0;
$l3_local++ if ( exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id1))));
$l3_local++ if ( exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id2))));
$level3_global += $l3_local;
if( $l3_local == 2) {
foreach my $feature1 (@{$hash_omniscient->{'level3'}{$tag_l3}{lc($mrna_id1)}}){
foreach my $feature2 (@{$hash_omniscient2->{'level3'}{$tag_l3}{lc($mrna_id2)}}){
if(($feature2->start <= $feature1->end) and ($feature2->end >= $feature1->start )){ # they overlap
$overlap_ft = $tag_l3;
last;
}
last if ($overlap_ft);
}
}
}
last if ($overlap_ft);
}
}
}
}
last if ($overlap_ft);
}
}
last if ($overlap_ft);
last if ($overlap_ft);
}
}
last if ($overlap_ft);
last if ($overlap_ft);
}
# Level3 test not possible, check overlap at level2
if (! $level3_global){ #nothing tested at level3
if ( $level2_overlap){
$overlap_ft = $level2_overlap;
}
else{
if (! $exist_l2A){
# check other locus was wihtout l2 also
foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){
if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id2)))){
$exist_l2B=1;
}
}
if (! $exist_l2B){ # locus2 was also without l2 let's check at level1 now
foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}} ){
if(exists_keys($hash_omniscient,('level1', $tag_l1, lc($gene_id)))){
if(exists_keys($hash_omniscient,('level1', $tag_l1, lc($gene_id2)))){
my $level1_feature = $hash_omniscient->{'level1'}{$tag_l1}{$gene_id};
my $level1_feature2 = $hash_omniscient->{'level1'}{$tag_l1}{$gene_id2};
if(($level1_feature2->start <= $level1_feature->end) and ($level1_feature2->end >= $level1_feature->start )){ # they overlap
$overlap_ft=$tag_l1;
last
}
}
else{
last;
}
}
}
}
}
}
}

# Level3 test not possible, check overlap at level2
if (! $level3_global){ #nothing tested at level3
if ( $level2_overlap){
$overlap_ft = $level2_overlap;
}
else{
if (! $exist_l2A){
# check other locus was wihtout l2 also
foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){
if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id2)))){
$exist_l2B=1;
}
}
if (! $exist_l2B){ # locus2 was also without l2 let's check at level1 now
foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}} ){
if(exists_keys($hash_omniscient,('level1', $tag_l1, lc($gene_id)))){
if(exists_keys($hash_omniscient,('level1', $tag_l1, lc($gene_id2)))){
my $level1_feature = $hash_omniscient->{'level1'}{$tag_l1}{$gene_id};
my $level1_feature2 = $hash_omniscient->{'level1'}{$tag_l1}{$gene_id2};
if(($level1_feature2->start <= $level1_feature->end) and ($level1_feature2->end >= $level1_feature->start )){ # they overlap
$overlap_ft=$tag_l1;
last
}
}
else{
last;
}
}
}
}
}
}
}
return $overlap_ft;
}

Expand Down
5 changes: 5 additions & 0 deletions t/scripts_output.t
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,11 @@ system(" $script --ref t/gff_syntax/in/25_test.gff --add t/gff_syntax/in/9_test
ok( system("diff $result $outtmp") == 0, "output $script");
unlink $outtmp;

$result = "$output_folder/agat_sp_complement_annotations_2.gff";
system(" $script --ref $input_folder/agat_sp_complement_annotations/agat_sp_complement_annotations_ref.gff --add $input_folder/agat_sp_complement_annotations/agat_sp_complement_annotations_add.gff -o $outtmp 2>&1 1>/dev/null");
#run test
ok( system("diff $result $outtmp") == 0, "output $script");
unlink $outtmp;

# --------check agat_sp_ensembl_output_style.pl-------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Chr1 Liftoff gene 41075 54676 . + . ID=gene-gene1
Chr1 Liftoff mRNA 41075 54676 . + . ID=rna-transcript1;Parent=gene-gene1
Chr1 Liftoff exon 41075 54676 . + . ID=exon-transcript1-1;Parent=rna-transcript1
Chr1 Liftoff CDS 41311 54435 . + 0 ID=cds-codingsequence-1.1;Parent=rna-transcript1
Chr1 Liftoff five_prime_UTR 41075 41310 . + . ID=nbis-five_prime_utr-19342;Parent=rna-transcript1
Chr1 Liftoff three_prime_UTR 54436 54676 . + . ID=nbis-three_prime_utr-18368;Parent=rna-transcript1
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Chr1 Helixer gene 41064 54753 . + . ID=Chr1_000001
Chr1 Helixer mRNA 41064 54753 . + . ID=Chr1_000001.1;Parent=Chr1_000001
Chr1 Helixer exon 41064 54753 . + . ID=Chr1_000001.1.exon.1;Parent=Chr1_000001.1
Chr1 Helixer five_prime_UTR 41064 41310 . + . ID=Chr1_000001.1.five_prime_UTR.1;Parent=Chr1_000001.1
Chr1 Helixer CDS 41311 54435 . + 0 ID=Chr1_000001.1.CDS.1;Parent=Chr1_000001.1
Chr1 Helixer three_prime_UTR 54436 54753 . + . ID=Chr1_000001.1.three_prime_UTR.1;Parent=Chr1_000001.1
7 changes: 7 additions & 0 deletions t/scripts_output/out/agat_sp_complement_annotations_2.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##gff-version 3
Chr1 Helixer gene 41064 54753 . + . ID=Chr1_000001
Chr1 Helixer mRNA 41064 54753 . + . ID=Chr1_000001.1;Parent=Chr1_000001
Chr1 Helixer exon 41064 54753 . + . ID=Chr1_000001.1.exon.1;Parent=Chr1_000001.1
Chr1 Helixer CDS 41311 54435 . + 0 ID=Chr1_000001.1.CDS.1;Parent=Chr1_000001.1
Chr1 Helixer five_prime_UTR 41064 41310 . + . ID=Chr1_000001.1.five_prime_UTR.1;Parent=Chr1_000001.1
Chr1 Helixer three_prime_UTR 54436 54753 . + . ID=Chr1_000001.1.three_prime_UTR.1;Parent=Chr1_000001.1

0 comments on commit 180374e

Please sign in to comment.