forked from wurmlab/afra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
IntervalStore.pm
228 lines (185 loc) · 6.79 KB
/
IntervalStore.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
package IntervalStore;
use strict;
use warnings;
use Carp;
use Storable ();
use ArrayRepr;
use LazyNCList;
=head1 NAME
IntervalStore - manages a set of intervals (genomic features)
=head1 SYNOPSIS
my $js = JsonStore->new($pathTempl, $compress);
my $is = IntervalStore->new({
store => $js,
classes => [
{
attributes => ["Start", "End", "Strand"],
},
],
urlTemplate => "lf-{Chunk}.jsonz",
);
my $chunkBytes = 80_000;
$is->startLoad($chunkBytes);
$is->addSorted([10, 100, -1])
$is->addSorted([50, 80, 1])
$is->addSorted([90, 150, -1])
$is->finishLoad();
$is->overlap(60, 85)
=> ([10, 100, -1], [50, 80, 1])
=head1 METHODS
=head2 new
Title : new
Usage : IntervalStore->new(
store => $js,
classes => {attributes => ["Start", "End", "Strand"]},
)
Function: create an IntervalStore
Returns : an IntervalStore object
Args : The IntervalStore constuctor accepts the named parameters:
store: object with put(path, data) method, will be used to output
feature data
classes: describes the feature arrays; will be used to construct
an ArrayRepr
urlTemplate (optional): template for URLs where chunks of feature
data will be stored. This is relative to
the directory with the "trackData.json" file
lazyClass (optional): index in classes->{attributes} array for
the class indicating a lazy feature
nclist (optional): the root of the nclist
count (optional): the number of intervals in this IntervalStore
minStart (optional): the earliest interval start point
maxEnd (optional): the latest interval end point
If this IntervalStore hasn't been loaded yet, the optional
parameters aren't necessary. But to access a previously-loaded
IntervalStore, the optional parameters *are* needed.
=cut
sub new {
my ($class, $args) = @_;
my $self = {
store => $args->{store},
classes => Storable::dclone( $args->{classes} ),
lazyClass => $args->{lazyClass},
urlTemplate => $args->{urlTemplate} || ("lf-{Chunk}"
. $args->{store}->ext),
attrs => ArrayRepr->new($args->{classes}),
nclist => $args->{nclist},
minStart => $args->{minStart},
maxEnd => $args->{maxEnd},
loadedChunks => {}
};
if (defined($args->{nclist})) {
# we're already loaded
$self->{lazyNCList} =
LazyNCList->importExisting($self->{attrs},
$args->{lazyClass},
$args->{count},
$args->{minStart},
$args->{maxEnd},
sub { $self->_loadChunk( @_ ); },
$args->{nclist} );
}
bless $self, $class;
return $self;
}
sub _loadChunk {
my ($self, $chunkId) = @_;
my $chunk = $self->{loadedChunks}->{$chunkId};
if (defined($chunk)) {
return $chunk;
} else {
(my $path = $self->{urlTemplate}) =~ s/\{Chunk\}/$chunkId/g;
$chunk = $self->{store}->get($path);
# TODO limit the number of chunks that we keep in memory
$self->{loadedChunks}->{$chunkId} = $chunk;
return $chunk;
}
}
=head2 startLoad( $measure, $chunkBytes )
=cut
sub startLoad {
my ($self, $measure, $chunkBytes) = @_;
if (defined($self->{nclist})) {
confess "loading into an already-loaded IntervalStore";
} else {
# add a new class for "fake" features
push @{$self->{classes}}, {
'attributes' => ['Start', 'End', 'Chunk'],
'isArrayAttr' => {'Sublist' => 1}
};
$self->{lazyClass} = $#{$self->{classes}};
my $makeLazy = sub {
my ($start, $end, $chunkId) = @_;
return [$self->{lazyClass}, $start, $end, $chunkId];
};
my $output = sub {
my ($toStore, $chunkId) = @_;
(my $path = $self->{urlTemplate}) =~ s/\{Chunk\}/$chunkId/g;
$self->{store}->put($path, $toStore);
};
$self->{attrs} = ArrayRepr->new($self->{classes});
$self->{lazyNCList} =
LazyNCList->new($self->{attrs},
$self->{lazyClass},
$makeLazy,
sub { $self->_loadChunk( @_); },
$measure,
$output,
$chunkBytes);
}
}
=head2 addSorted( \@feature )
=cut
sub addSorted {
my ($self, $feat) = @_;
$self->{lazyNCList}->addSorted($feat);
}
=head2 finishLoad()
=cut
sub finishLoad {
my ($self) = @_;
$self->{lazyNCList}->finish();
$self->{nclist} = $self->lazyNCList->topLevelList();
}
=head2 overlapCallback( $from, $to, \&func )
Calls the given function once for each of the intervals that overlap
the given interval if C<<$from <= $to>>, iterates left-to-right, otherwise
iterates right-to-left.
=cut
sub overlapCallback {
my ($self, $start, $end, $cb) = @_;
$self->lazyNCList->overlapCallback($start, $end, $cb);
}
sub lazyNCList { shift->{lazyNCList} }
sub count { shift->{lazyNCList}->count }
sub hasIntervals { shift->count > 0 }
sub store { shift->{store} }
sub classes { shift->{classes} }
=head2 descriptor
Title : descriptor
Usage : IntervalStore->descriptor()
Returns : a hash containing the data needed to re-construct this
IntervalStore, including the root of the NCList plus some
metadata and configuration.
The return value can be passed to the constructor later.
=cut
sub descriptor {
my ($self) = @_;
return {
classes => $self->{classes},
lazyClass => $self->{lazyClass},
nclist => $self->{nclist},
urlTemplate => $self->{urlTemplate},
count => $self->count,
minStart => $self->lazyNCList->minStart,
maxEnd => $self->lazyNCList->maxEnd
};
}
1;
=head1 AUTHOR
Mitchell Skinner E<lt>[email protected]<gt>
Copyright (c) 2007-2011 The Evolutionary Software Foundation
This package and its accompanying libraries are free software; you can
redistribute it and/or modify it under the terms of the LGPL (either
version 2.1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text.
=cut