From d8a3f1593687b0ec9ed0a1aaed91c9c0c6c3cf6d Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Wed, 4 Dec 2024 16:18:17 +0100 Subject: [PATCH] fix #507 - add option raw to print raw data in a dedidcated folder (#510) * Update agat_sp_statistics.pl * add _print_raw function in OmniscientStat.pm --- bin/agat_sp_statistics.pl | 32 +++- docs/tools/agat_sp_statistics.md | 8 +- lib/AGAT/OmniscientStat.pm | 285 ++++++++++++++++++------------- t/scripts_output.t | 3 +- 4 files changed, 207 insertions(+), 121 deletions(-) diff --git a/bin/agat_sp_statistics.pl b/bin/agat_sp_statistics.pl index 16f937d5..ae43608e 100755 --- a/bin/agat_sp_statistics.pl +++ b/bin/agat_sp_statistics.pl @@ -17,6 +17,7 @@ my $opt_percentile = 90; my $opt_genomeSize = undef; my $opt_plot = undef; +my $opt_raw = undef; my $opt_verbose = 0; my $opt_help= 0; @@ -25,8 +26,9 @@ "h|help" => \$opt_help, 'o|output=s' => \$opt_output, 'percentile=i' => \$opt_percentile, - 'yaml!' => \$opt_yaml, - 'd|p' => \$opt_plot, + 'yaml!' => \$opt_yaml, + 'r|raw!' => \$opt_raw, + 'd|p!' => \$opt_plot, 'v|verbose' => \$opt_verbose, 'g|f|gs=s' => \$opt_genomeSize, "gff|i=s" => \$gff)) @@ -62,6 +64,23 @@ } } +# Manage raw data +if($opt_raw){ + if ($opt_output){ + $opt_raw = $opt_output."_raw_data"; + } + else{ + $opt_raw = "raw_data"; + + if (-f $opt_raw){ + print "Cannot create a directory with the name $opt_raw because a file with this name already exists.\n";exit(); + } + if (-d $opt_raw){ + print "Cannot create a directory with the name $opt_raw because a folder with this name already exists.\n";exit(); + } + } +} + #Manage plot folder output if($opt_plot){ @@ -107,6 +126,7 @@ percentile => $opt_percentile, output => $out, yaml => $opt_yaml, + raw => $opt_raw, distri => $opt_plot, isoform => 1, verbose => $opt_verbose @@ -150,7 +170,7 @@ =head1 OPTIONS =item B<-d> or B<-p> -When this option is used, an histogram of distribution of the features will be printed in pdf files. (d means distribution, p means plot). +Bolean - When this option is used, an histogram of distribution of the features will be printed in pdf files in a folder with distribution_plots suffix. (d means distribution, p means plot). =item B<-v> or B<--verbose> @@ -164,9 +184,13 @@ =head1 OPTIONS Integer - Percentile to compute. Default is 90. +=item B<-r> or B<--raw> + +Bolean - When this option is used, the raw data (same as used to create histogram of distribution of the features) are printed in a dedicated folder with raw_data suffix. + =item B<--yaml> -Bolean - When this option is activated , a second output will be printed either in STDOUT if no output provided or in (a .yaml suffix is added to the --output value provided) +Bolean - When this option is activated, a second output will be printed either in STDOUT if no output provided or in (a .yaml suffix is added to the --output value provided). =item B<-c> or B<--config> diff --git a/docs/tools/agat_sp_statistics.md b/docs/tools/agat_sp_statistics.md index 6571f4b8..4750c2f7 100644 --- a/docs/tools/agat_sp_statistics.md +++ b/docs/tools/agat_sp_statistics.md @@ -29,7 +29,7 @@ agat_sp_statistics.pl --help - **-d** or **-p** - When this option is used, an histogram of distribution of the features will be printed in pdf files. (d means distribution, p means plot). + Bolean - When this option is used, an histogram of distribution of the features will be printed in pdf files in a folder with distribution_plots suffix. (d means distribution, p means plot). - **-v** or **--verbose** @@ -43,9 +43,13 @@ agat_sp_statistics.pl --help Integer - Percentile to compute. Default is 90. +- **-r** or **--raw** + + Bolean - When this option is used, the raw data (same as used to create histogram of distribution of the features) are printed in a dedicated folder with raw_data suffix. + - **--yaml** - Bolean - When this option is activated , a second output will be printed either in STDOUT if no output provided or in (a .yaml suffix is added to the --output value provided) + Bolean - When this option is activated, a second output will be printed either in STDOUT if no output provided or in (a .yaml suffix is added to the --output value provided). - **-c** or **--config** diff --git a/lib/AGAT/OmniscientStat.pm b/lib/AGAT/OmniscientStat.pm index 947228c8..7dad2858 100644 --- a/lib/AGAT/OmniscientStat.pm +++ b/lib/AGAT/OmniscientStat.pm @@ -52,7 +52,7 @@ sub print_omniscient_statistics{ if(ref($args) ne 'HASH'){ print "Hash Arguments expected for print_omniscient_statistics. Please check the call.\n";exit; } # Declare all variables and fill them - my ($omniscient, $genome_size, $output, $yaml, $verbose, $distri, $isoform, $percentile); + my ($omniscient, $genome_size, $output, $yaml, $raw, $verbose, $distri, $isoform, $percentile); # omniscient if( defined($args->{input})) {$omniscient = $args->{input};} @@ -93,6 +93,10 @@ sub print_omniscient_statistics{ if( defined($args->{yaml}) ) { $yaml = $args->{yaml}; } + # Raw data output filename + if( defined($args->{raw}) ) { + $raw = $args->{raw}; + } # add verbosity if( ! defined($args->{verbose}) ) {$verbose = 0;} else{ $verbose = $args->{verbose}; } @@ -165,6 +169,9 @@ sub print_omniscient_statistics{ if($distri){ _print_distribution($distri, "with_isoforms", $distri_hash); } + if($raw){ + _print_raw($raw, "with_isoforms", $distri_hash); + } # fill for YAML $hash_yaml{ $by_type }{"isoform"}="no"; fill_yaml_hash(\%hash_yaml, $by_type, "without_isoforms", $stat); @@ -176,11 +183,14 @@ sub print_omniscient_statistics{ print $output file_text_line({ string => " $by_type", char => "-" }); print_sentences($output, $stat); if($distri){ - _print_distribution($distri, "with_isoform", $distri_hash); + _print_distribution($distri, "with_isoforms", $distri_hash); + } + if($raw){ + _print_raw($raw, "with_isoforms", $distri_hash); } # fill for YAML $hash_yaml{ $by_type }{"isoform"}="yes"; - fill_yaml_hash(\%hash_yaml, $by_type, "with_isoform", $stat); + fill_yaml_hash(\%hash_yaml, $by_type, "with_isoforms", $stat); # Print text file print $output "$by_type have isoforms! Here are the statistics without isoforms shortest isoforms excluded):\n\n"; @@ -190,6 +200,9 @@ sub print_omniscient_statistics{ if($distri){; _print_distribution($distri, "without_isoforms", $distri2_hash); } + if($raw){ + _print_raw($raw, "without_isoforms", $distri_hash); + } # fill for YAML fill_yaml_hash(\%hash_yaml, $by_type, "without_isoform", $stat2); @@ -200,7 +213,10 @@ sub print_omniscient_statistics{ print $output file_text_line({ string => " $by_type", char => "-" }); print_sentences($output, $stat); if($distri){ - _print_distribution($distri, "with_isoform", $distri_hash); + _print_distribution($distri, "with_isoforms", $distri_hash); + } + if($raw){ + _print_raw($raw, "with_isoforms", $distri_hash); } # fill for YAML $hash_yaml{ $by_type }{"isoform"}="NA"; @@ -271,6 +287,44 @@ sub recreate_sentence{ return $sentence; } +# @Purpose: Purpose print raw data from feature statistics +# @input: 3 => String (folder), string (sub folder with or without iso) and hash (distribution) +# @output: 0 +sub _print_raw{ + my ($folder, $subfolder, $distri)=@_; + + foreach my $type (keys %{$distri} ) { + + foreach my $level (keys %{$distri->{$type}} ) { + foreach my $tag ( keys %{$distri->{$type}{$level}} ) { + if( exists_keys ($distri,($type, $level, $tag, 'whole') ) ){ + + if(! -d $folder){ + mkdir $folder; + } + + if(! -d $folder."/".$subfolder){ + mkdir $folder."/".$subfolder; + } + + my $output_tsv = $folder."/".$subfolder."/".$type."Class_".$tag.".tsv"; + + # Open the file for writing + open my $fh, '>', $output_tsv or die "Cannot write in file: $!"; + print $fh "Name\tSize(bp)\n"; # Print header + # Print the list to the file + foreach my $tuple (@{$distri->{$type}{$level}{$tag}{'whole'}}) { + print $fh join("\t", @$tuple), "\n"; # join tuple element with tab and print + } + + # Close the file + close $fh; + } + } + } + } +} + # @Purpose: Purpose print distribution from feature statistics # @input: 2 => String (folder), string (sub folder with or without iso) and hash (distribution) # @output: 0 @@ -293,9 +347,12 @@ sub _print_distribution{ my $outputPDF = $folder."/".$subfolder."/".$type."Class_".$tag.".pdf"; + # Create a new list containing only the second element of each tuple + my @first_elements = map { $_->[1] } @{$distri->{$type}{$level}{$tag}{'whole'}}; + #CREATE THE R COMMAND - my $nbValues = @{$distri->{$type}{$level}{$tag}{'whole'}}; - my $R_command = rcc_plot_from_list($distri->{$type}{$level}{$tag}{'whole'}, "", "histogram", "$tag"." size (nt)", "Number of $tag", "Distribution of $tag sizes\nMade with $nbValues $tag", $outputPDF); + my $nbValues = @first_elements; + my $R_command = rcc_plot_from_list(\@first_elements, "", "histogram", "$tag"." size (nt)", "Number of $tag", "Distribution of $tag sizes\nMade with $nbValues $tag", $outputPDF); #EXECUTE THE R COMMAND execute_R_command($R_command); } @@ -393,7 +450,7 @@ sub get_omniscient_statistics_for_topfeature{ push @{$all_info{$tag_l1}{'level1'}{$tag_l1}{'size_list'}}, $sizeFeature; #create distribution list - push @{$all_info{$tag_l1}{'level1'}{$tag_l1}{'distribution'}}, $sizeFeature; + push @{$all_info{$tag_l1}{'level1'}{$tag_l1}{'distribution'}}, [$feature_l1->_tag_value('ID'), $sizeFeature]; # grab longest if ((! $all_info{$tag_l1}{'level1'}{$tag_l1}{'longest'}) or ($all_info{$tag_l1}{'level1'}{$tag_l1}{'longest'} < $sizeFeature)){ @@ -449,7 +506,7 @@ sub get_omniscient_statistics_from_l2{ push @{$all_info{$tag_l2}{'level1'}{$tag_l1}{'size_list'}}, $sizeFeature; #create distribution list - push @{$all_info{$tag_l2}{'level1'}{$tag_l1}{'distribution'}}, $sizeFeature; + push @{$all_info{$tag_l2}{'level1'}{$tag_l1}{'distribution'}}, [$feature_l1->_tag_value('ID'), $sizeFeature]; # grab longest if ((! $all_info{$tag_l2}{'level1'}{$tag_l1}{'longest'}) or ($all_info{$tag_l2}{'level1'}{$tag_l1}{'longest'} < $sizeFeature)){ @@ -477,7 +534,7 @@ sub get_omniscient_statistics_from_l2{ push @{$all_info{$tag_l2}{'level2'}{$tag_l2}{'size_list'}}, $sizeFeature; #create distribution list - push @{$all_info{$tag_l2}{'level2'}{$tag_l2}{'distribution'}}, $sizeFeature; + push @{$all_info{$tag_l2}{'level2'}{$tag_l2}{'distribution'}}, [$feature_l2->_tag_value('ID'), $sizeFeature]; # grab longest if ((! $all_info{$tag_l2}{'level2'}{$tag_l2}{'longest'}) or ($all_info{$tag_l2}{'level2'}{$tag_l2}{'longest'} < $sizeFeature)){ @@ -506,7 +563,7 @@ sub get_omniscient_statistics_from_l2{ push @{$all_info{$tag_l2}{'level2'}{'intron'}{'size_list'}}, $intronSize; #create distribution list - push @{$all_info{$tag_l2}{'level2'}{'intron'}{'distribution'}}, $sizeFeature; + push @{$all_info{$tag_l2}{'level2'}{'intron'}{'distribution'}}, [$feature_l2->_tag_value('ID'), $sizeFeature]; # grab longest if ((! $all_info{$tag_l2}{'level2'}{'intron'}{'longest'}) or ($all_info{$tag_l2}{'level2'}{'intron'}{'longest'} < $intronSize)){ @@ -521,149 +578,149 @@ sub get_omniscient_statistics_from_l2{ } } - # +----------------------------------------------------+ - # | FEATURE LEVEL3 | - # f+----------------------------------------------------+ + # +----------------------------------------------------+ + # | FEATURE LEVEL3 | + # +----------------------------------------------------+ my $utr3 = undef; my $utr5 = undef; my $id_l2=lc($feature_l2->_tag_value('ID')); - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ + foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - if( exists_keys ($hash_omniscient, ('level3', $tag_l3, $id_l2) ) ){ + if( exists_keys ($hash_omniscient, ('level3', $tag_l3, $id_l2) ) ){ my $sizeMultiFeat=0; my $counterL3=-1; my $indexLast = $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; - foreach my $feature_l3 ( @sortedList ){ + foreach my $feature_l3 ( @sortedList ){ - #count number feature of tag_l3 type - $counterL3++; + #count number feature of tag_l3 type + $counterL3++; - #------------------------------------------------- - # Manage Introns - #------------------------------------------------- - # from the second intron to the last (from index 1 to last index of the table sortedList) - # We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end - 1; + #------------------------------------------------- + # Manage Introns + #------------------------------------------------- + # from the second intron to the last (from index 1 to last index of the table sortedList) + # We go inside this loop only if we have more than 1 feature. + if($counterL3 > 0 and $counterL3 <= $indexLast){ + my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end - 1; - #compute feature size - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'size_feat'} += $intronSize; - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'size_list'}}, $intronSize; + #compute feature size + $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'size_feat'} += $intronSize; + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'size_list'}}, $intronSize; - #create distribution list - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'distribution'}}, $sizeFeature; + #create distribution list + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'distribution'}}, [$feature_l3->_tag_value('ID'), $sizeFeature]; - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'} < $intronSize)){ + # grab longest + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'} < $intronSize)){ $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'}=$intronSize; } # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'} > $intronSize)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}=$intronSize; - } + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'} > $intronSize)){ + $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}=$intronSize; + } - #Count number - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'nb_feat'} += 1; - } + #Count number + $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'nb_feat'} += 1; + } - #compute cumulative feature size - my $sizeFeature=($feature_l3->end-$feature_l3->start)+1; - $all_info{$tag_l2}{'level3'}{$tag_l3}{'size_feat'} += $sizeFeature; - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'size_list'}}, $sizeFeature; + #compute cumulative feature size + my $sizeFeature=($feature_l3->end-$feature_l3->start)+1; + $all_info{$tag_l2}{'level3'}{$tag_l3}{'size_feat'} += $sizeFeature; + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'size_list'}}, $sizeFeature; - #------------------------------------------------- - # MANAGE SPREAD FEATURES (multi exon features) - #------------------------------------------------- - if(($tag_l3 =~ /cds/) or ($tag_l3 =~ /utr/)){ - $sizeMultiFeat += $sizeFeature; - $all_info{$tag_l2}{'level3'}{$tag_l3}{'exon'}{'nb_feat'}++; + #------------------------------------------------- + # MANAGE SPREAD FEATURES (multi exon features) + #------------------------------------------------- + if(($tag_l3 =~ /cds/) or ($tag_l3 =~ /utr/)){ + $sizeMultiFeat += $sizeFeature; + $all_info{$tag_l2}{'level3'}{$tag_l3}{'exon'}{'nb_feat'}++; - #### MANAGE piece of multi exon features (spread features) + #### MANAGE piece of multi exon features (spread features) - #create distribution list of multifeature piece - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'distribution'}}, $sizeFeature; + #create distribution list of multifeature piece + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'distribution'}}, [$feature_l3->_tag_value('ID'), $sizeFeature]; # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'} < $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}=$sizeFeature; + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'} < $sizeFeature)){ + $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}=$sizeFeature; + } + # grab shorter + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'} > $sizeFeature)){ + $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}=$sizeFeature; } - # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}=$sizeFeature; - } - } - #------------------------------------------------- - # MANAGE single FEATURES (multi exon features) - #------------------------------------------------- - else{ - #count number of feature - $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; - - #create distribution list of multifeature piece - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeFeature)){ + } + #------------------------------------------------- + # MANAGE single FEATURES (multi exon features) + #------------------------------------------------- + else{ + #count number of feature + $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; + + #create distribution list of multifeature piece + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, [$feature_l3->_tag_value('ID'), $sizeFeature]; + + # grab longest + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeFeature)){ $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}=$sizeFeature; } # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeFeature; - } - } - #################### - #mange utr per mRNA - if ($tag_l3 =~ /three_prime_utr/){ - $utr3=1; - } - if ($tag_l3 =~ /five_prime_utr/){ - $utr5=1; - } - }# END FOREACH L3 + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeFeature)){ + $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeFeature; + } + } + #################### + #mange utr per mRNA + if ($tag_l3 =~ /three_prime_utr/){ + $utr3=1; + } + if ($tag_l3 =~ /five_prime_utr/){ + $utr5=1; + } + }# END FOREACH L3 - #---------------------------------------- - # NOW TAKE CARE OF MULTIFEATURE AND L2 - #in that case the feature was split in several peaces that have been glue together - if (($tag_l3 =~ /utr/) or ($tag_l3 =~ /cds/)){ - #count number of feature - $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; + #---------------------------------------- + # NOW TAKE CARE OF MULTIFEATURE AND L2 + #in that case the feature was split in several peaces that have been glue together + if (($tag_l3 =~ /utr/) or ($tag_l3 =~ /cds/)){ + #count number of feature + $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; - #create distribution list - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, $sizeMultiFeat; + #create distribution list - get the id of the last piece + push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, [$all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'distribution'}[$#{$all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'distribution'}}][0], $sizeMultiFeat]; - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeMultiFeat)){ + # grab longest + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeMultiFeat)){ $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}=$sizeMultiFeat; } # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeMultiFeat)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeMultiFeat; - } - } - - if ($tag_l3 =~ /exon/){ - if ($indexLast == 0) { - # body... - $extra_info{'single'}{$tag_l2}{'level2'}{$tag_l2}++; - } - else{ - $All_l2_single=undef; - } - } + if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeMultiFeat)){ + $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeMultiFeat; + } + } + + if ($tag_l3 =~ /exon/){ + if ($indexLast == 0) { + # body... + $extra_info{'single'}{$tag_l2}{'level2'}{$tag_l2}++; + } + else{ + $All_l2_single=undef; + } + } } }# END all feature level 3 - # 1) Manage UTR both side - if ($utr3 and $utr5){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_both_side'}++; - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; - } # 2) Manage UTR at least one side - elsif ($utr3 or $utr5){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; + # 1) Manage UTR both side + if ($utr3 and $utr5){ + $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_both_side'}++; + $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; + } # 2) Manage UTR at least one side + elsif ($utr3 or $utr5){ + $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; } }# END all feature level 2 diff --git a/t/scripts_output.t b/t/scripts_output.t index 8da43aa6..487bb26b 100644 --- a/t/scripts_output.t +++ b/t/scripts_output.t @@ -691,10 +691,11 @@ rmtree $outtmp; $script = $script_prefix."bin/agat_sp_statistics.pl"; $result = "$output_folder/agat_sp_statistics_1.txt"; -system(" $script --gff $input_folder/1.gff -o $outtmp 2>&1 1>/dev/null"); +system(" $script --gff $input_folder/1.gff -o $outtmp -r 2>&1 1>/dev/null"); #run test ok( system("diff $result $outtmp") == 0, "output $script"); unlink $outtmp; +rmtree $outtmp."_raw_data"; # --------check agat_sp_webApollo_compliant.pl-------------