-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path8.3.pl
72 lines (67 loc) · 1.61 KB
/
8.3.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
use strict;
use warnings;
use BeginPerlBioinfo; # see Chapter 6 about this module
use Kumar1;
# Initialize variables
my @file_data = ( );
my $dna = '';
my $protein = '';
# Read in the contents of the file "sample.dna"
my $file="sample.dna";
my $array_ref=readFileToArray($file);
@file_data = @$array_ref;
# Extract the sequence data from the contents of the file
#"sample.dna"
$dna = extract_sequence_from_fasta_data(@file_data);
# Translate the DNA to protein
$protein = dna2peptide($dna);
# Print the sequence in lines 25 characters long
print_sequence($protein, 25);
exit;
sub extract_sequence_from_fasta_data {
my(@fasta_file_data) = @_;
# Declare and initialize variables
my $sequence = '';
foreach my $line (@fasta_file_data) {
# discard blank line
if ($line =~ /^\s*$/) {
next;
# discard comment line
} elsif($line =~ /^\s*#/) {
next;
# discard fasta header line
} elsif($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace)
#from $sequence string
$sequence =~ s/\s//g;
return $sequence;
}
sub print_sequence {
my($sequence, $length) = @_;
# Print sequence in lines of $length
for ( my $pos = 0 ; $pos < length($sequence) ; $pos +=$length ) {
}
print substr($sequence, $pos, $length), "\n";
}
sub dna2peptide {
my($dna) = @_;
use strict;
use warnings;
use BeginPerlBioinfo; # see Chapter 6 about this
module
# Initialize variables
my $protein = '';
IT-SC 188
# Translate each three-base codon to an amino acid, and
append to a protein
for(my $i=0; $i < (length($dna) - 2) ; $i += 3) {
$protein .= codon2aa( substr($dna,$i,3) );
}
return $protein;
}