-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsequence_profile.go
141 lines (123 loc) · 4.27 KB
/
sequence_profile.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package fragbag
import (
"fmt"
"github.com/TuftsBCB/seq"
)
var _ = SequenceLibrary(&sequenceProfile{})
// sequenceProfile represents a Fragbag sequence fragment library.
// Fragbag fragment libraries are fixed both in the number of fragments and in
// the size of each fragment.
type sequenceProfile struct {
Ident string
Fragments []sequenceProfileFrag
FragSize int
}
// Fragment corresponds to a single sequence fragment in a fragment library.
// It holds the fragment number identifier and embeds a sequence profile.
type sequenceProfileFrag struct {
FragNumber int
*seq.Profile
}
// NewSequenceProfile initializes a new Fragbag sequence library with the
// given name and fragments. All sequence profiles given must have the same
// number of columns.
//
// Fragments for this library are represented as regular sequence profiles.
// Namely, each column plainly represents the composition of each amino acid.
func NewSequenceProfile(
name string,
fragments []*seq.Profile,
) (SequenceLibrary, error) {
lib := new(sequenceProfile)
lib.Ident = name
for _, frag := range fragments {
if err := lib.add(frag); err != nil {
return nil, err
}
}
return lib, nil
}
func (lib *sequenceProfile) SubLibrary() Library {
return nil
}
// Add adds a sequence fragment to the library, where a sequence fragment
// corresponds to a profile of log-odds scores for each amino acid.
// The first call to Add may contain any number of columns in the profile.
// All subsequent adds must contain the same number of columns as the first.
func (lib *sequenceProfile) add(prof *seq.Profile) error {
if lib.Fragments == nil || len(lib.Fragments) == 0 {
frag := sequenceProfileFrag{0, prof}
lib.Fragments = append(lib.Fragments, frag)
lib.FragSize = prof.Len()
return nil
}
frag := sequenceProfileFrag{len(lib.Fragments), prof}
if lib.FragSize != prof.Len() {
return fmt.Errorf("Fragment %d has length %d; expected length %d.",
frag.FragNumber, prof.Len(), lib.FragSize)
}
lib.Fragments = append(lib.Fragments, frag)
return nil
}
// Save saves the full fragment library to the writer provied.
func (lib *sequenceProfile) Tag() string {
return libTagSequenceProfile
}
// Size returns the number of fragments in the library.
func (lib *sequenceProfile) Size() int {
return len(lib.Fragments)
}
// FragmentSize returns the size of every fragment in the library.
func (lib *sequenceProfile) FragmentSize() int {
return lib.FragSize
}
// String returns a string with the name of the library, the number of
// fragments in the library and the size of each fragment.
func (lib *sequenceProfile) String() string {
return fmt.Sprintf("%s (%d, %d)",
lib.Ident, len(lib.Fragments), lib.FragSize)
}
func (lib *sequenceProfile) Name() string {
return lib.Ident
}
// Best returns the number of the fragment that best corresponds
// to the string of amino acids provided.
// The length of `sequence` must be equivalent to the fragment size.
//
// If no "good" fragments can be found, then `-1` is returned. This
// behavior will almost certainly change in the future.
func (lib *sequenceProfile) BestSequenceFragment(s seq.Sequence) int {
// Since fragments are guaranteed not to have gaps by construction,
// we can do a straight-forward summation of the negative log-odds
// probabilities corresponding to the residues in `s`.
var testAlign seq.Prob
bestAlign, bestFragNum := seq.MinProb, -1
for i := range lib.Fragments {
testAlign = lib.AlignmentProb(i, s)
if bestAlign.Less(testAlign) {
bestAlign, bestFragNum = testAlign, i
}
}
return bestFragNum
}
func (lib *sequenceProfile) FragmentString(fragNum int) string {
return fmt.Sprintf("> %d\n%s", fragNum, lib.Fragments[fragNum].Profile)
}
func (lib *sequenceProfile) Fragment(fragNum int) interface{} {
return lib.Fragments[fragNum].Profile
}
// AlignmentProb computes the probability of the sequence `s` aligning
// with the profile in `frag`. The sequence must have length equivalent
// to the fragment size.
func (lib *sequenceProfile) AlignmentProb(fragi int, s seq.Sequence) seq.Prob {
frag := lib.Fragments[fragi]
if s.Len() != frag.Len() {
panic(fmt.Sprintf("Sequence length %d != fragment size %d",
s.Len(), frag.Len()))
}
prob := seq.Prob(0.0)
for c := 0; c < s.Len(); c++ {
prob += frag.Emissions[c].Lookup(s.Residues[c])
}
return prob
}