-
Notifications
You must be signed in to change notification settings - Fork 0
/
gff2gtf
executable file
·104 lines (86 loc) · 2.1 KB
/
gff2gtf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#! /usr/bin/env perl
use strict;
use feature 'postderef';
use warnings FATAL => 'all';
no warnings "experimental::postderef";
#use Carp::Always;
# use FindBin;
# use lib "$FindBin::Bin";
# use Xyzzy;
use constant { TRUE => 1, FALSE => 0 };
# 'wide character' warning.
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
# ------------------------------------------------------------------------
use File::Basename;
use Getopt::Std;
our $opt_h;
my $usage_str = "";
my $progname = basename($0);
$usage_str .= "Usage: $progname [options]\n";
$usage_str .= "-h - print help\n";
sub usage {
print STDERR $usage_str;
exit(@_);
}
my $stat = getopts('h');
if (!$stat) {
usage(1);
}
if ($opt_h) {
usage();
}
if (scalar(@ARGV) != 0) {
usage(1);
}
# ------------------------------------------------------------------------
use constant { NO_VALUE => ";no-value;" };
sub parse_gff_attributes {
my ($raw_attributes) = @_;
my $attributes = {};
foreach my $key_val (split(/; */,$raw_attributes)) {
my ($key,$val);
if ( $key_val =~ /^([^=]+)=(.*)/ ) {
($key,$val) = ($1,$2);
} else {
($key,$val) = ($key_val, NO_VALUE);
}
$attributes->{$key} = $val;
}
return $attributes;
}
sub unparse_gtf_attributes {
my ($attributes) = @_;
my @l;
foreach my $key (sort (keys %$attributes) ) {
my $val = $attributes->{$key};
if ( $val eq NO_VALUE ) {
push @l, $key;
} else {
if ( $val =~ /[ "'\\]/ ) {
$val = '"'.$val.'"';
}
push @l, $key." ".$val;
}
}
return join(";",@l);
}
# ------------------------------------------------------------------------
while (<STDIN>) {
chomp;
if (/^#/) {
next;
}
my ($accession,$source,$feature,$start,$end,
$score,$strand,$frame,$raw_attributes) = split(/\t/,$_);
my $attributes = parse_gff_attributes($raw_attributes);
if (defined($attributes->{gene_id})) {
;
} elsif (defined($attributes->{ID})) {
$attributes->{gene_id} = $attributes->{ID};
} else {
die;
}
print join("\t",$accession,$source,$feature,$start,$end,
$score,$strand,$frame,unparse_gtf_attributes($attributes)),"\n";
}