RADtags

#!/usr/bin/env perl

# RADtags
# Identify RAD tags in reads from RAD samples
# Expects sorted reads as input

# History:
# 03/08/10 Incorporated into RADtools
# 18/08/10 Run through perlcritic, tidied up options, added POD,
#          added main() for testing
# 20/08/10 Multiplexing samples in parallel; write output to files
# 25/08/10 Catch absence of Parallel::ForkManager
# 25/08/10 Version 1.0
# 08/09/10 1.0.1 Minor bug fixes
# 29/10/10 1.0.2 Tidy
# 30/10/10 1.1   Load FASTQ format as well as reads format
#                Strip restriction site off the start of reads
# 11/11/10 1.1.1 Bug fixes for test suite - added verbose option
# 16/02/11 1.2   New RADmarkers output; no changes to RADtags
# 15/05/11 1.2.1 Replace -e option with -c; process largest file first
#                Document -v option
# 22/11/11 1.2.3 Add -t option to trim site overhang from reads

#############################################################################
###
### PREAMBLE
###
#############################################################################

# Core pragmas and modules
use strict;
use warnings;
use English qw( -no_match_vars );
use Getopt::Long qw(:config bundling no_auto_abbrev auto_version );
use Pod::Usage;
use POSIX qw{ceil};
use Data::Dumper;

use RADtools qw (load_fastq_pair sort_reads_file QUAL_OFFSET);

# Non-core modules
eval {
    require Parallel::ForkManager;
    Parallel::ForkManager->import();
};
die
"RADtags requires the CPAN module Parallel::ForkManager. Please install this package and add it to your Perl library path.\n"
  if $@;

# Would like to use Carp, but an outstanding bug causes cryptic errors
# when using caller(), so using die until this is fixed
# http://www.nntp.perl.org/group/perl.perl5.porters/2010/03/msg157461.html

local $main::VERSION    = 1.2.3;    # Used by Getopt::Long to provide --version
local $OUTPUT_AUTOFLUSH = 1;      # So reporting on progress works

main(@ARGV) unless caller;        # So test suite can call script

sub main {

    # Set up default options

    my $help    = 0;
    my $usage   = 0;
    my $man     = 0;
    my $verbose = 0;

    my $file_extension    = '.reads';
    my $directory         = '.';
    my $cluster_distance  = 5;
    my $quality_threshold = 20;
    my $read_threshold    = 2;
    my $max_processes     = 1;
    my $site              = '-';
    my $illumina          = 0;
    my $fastq             = 0;
    my $trim_site         = 0;

    my $options_okay = GetOptions(

        'help|h'    => \$help,
        'usage|u'   => \$usage,
        'man'       => \$man,
        'verbose|v' => \$verbose,

        'file_extension|f=s'   => \$file_extension,
        'directory|d=s'        => \$directory,
        'cluster_distance|c=i' => \$cluster_distance,
        'site|s=s'             => \$site,
        'illumina|i'           => \$illumina,
        'fastq|z'              => \$fastq,
        'trim_site|t'          => \$trim_site,

        'quality_threshold|q=i' => \$quality_threshold,
        'read_threshold|r=i'    => \$read_threshold,
        'max_processes|m=i'     => \$max_processes,
    ) or pod2usage( -exitval => 3, -verbose => 0 );

    pod2usage( -exitval => 4, -verbose => 0 ) if $usage;
    pod2usage( -exitval => 5, -verbose => 1 ) if $help;
    pod2usage( -exitval => 6, -verbose => 2 ) if $man;

    pod2usage( -exitval => 8, -verbose => 0 ) if ( $fastq && $site eq '-' );

    die "Please specify a positive value for cluster distance (-c)\n"
      if ( $cluster_distance <= 0 );

    die("Site (-s) should only contain AGCT\n")
      if ( ( $site ne '-' ) && ($site ne '') && ( $site !~ /^[ACGT]+$/ ) );

    if (($fastq) && ($file_extension eq '.reads')) {
        $file_extension = '.fastq';
    }

    opendir( my $readdir, $directory )
      or die "Can't open directory $directory!\n";
    if ($verbose) { print "Processing directory $directory ...\n"; }
    my @read_filenames = grep { /($file_extension)$/ } readdir $readdir;

    if ( @read_filenames == 0 ) {
        die
"No read files found with extension '$file_extension'. Please specify -f. You may need to specify the read file directory using -d.\n";
    }
    closedir $readdir;

    my %pair_filenames;
    if ($fastq) {
        my @paired_files;
        foreach my $read_filename (@read_filenames) {
            if ( $read_filename =~ /^(.+)\_1$file_extension$/ ) {
                my $filename_stub = $1;
                foreach my $read2_filename (@read_filenames) {
                    if ( $read2_filename =~
                        /^$filename_stub\_2$file_extension$/ )
                    {
                        push @paired_files, $read_filename;
                        $pair_filenames{$read_filename} = $read2_filename;
                    }
                }
            }
        }
        if ( @paired_files > 0 ) { @read_filenames = @paired_files; }
    }

    my $pm = new Parallel::ForkManager( scalar(@read_filenames) );
    $pm->set_max_procs($max_processes);

    foreach my $read_filename (reverse sort {-s $a <=> -s $b} @read_filenames) {

        next if ( $read_filename =~ /^\./ );

        $pm->start and next;
        if ($verbose) { print "Processing $read_filename...\n"; }
        if ($fastq) {

            # Set up quality thresholds and characters
            my $qual_low_char  = '!';
            my $qual_high_char = 'I';
            if ($illumina) {
                $qual_low_char  = ';';
                $qual_high_char = 'h';
            }

            # Detect input files
            my $read1_in;
            my $read2_in;
            my $sample_name = $read_filename;
            $sample_name =~ s/$directory\///;

            open $read1_in, '<', "$directory/$read_filename"
              or die "Can't open input file $read_filename!\n";

            if ( defined $pair_filenames{$read_filename} ) {
                $sample_name =~ s/\_1$file_extension//;
                open $read2_in, '<',
                  "$directory/$pair_filenames{$read_filename}"
                  or die
                  "Can't open input file $pair_filenames{$read_filename}!\n";
            }
            else {
                $sample_name =~ s/$file_extension//;
            }

            open my $reads_file, '>', "$directory/$sample_name\.reads"
              or die "Can't open reads file for FASTQ conversion!\n";

            while (
                my $pair_ref = load_fastq_pair(
                    {
                        read1_handle   => $read1_in,
                        read2_handle   => $read2_in,
                        qual_low_char  => $qual_low_char,
                        qual_high_char => $qual_high_char,
                        illumina       => $illumina,
                    }
                )
              )
            {
                last if ( $pair_ref->{valid} == -1 );

                print $reads_file "$pair_ref->{r1seq} $pair_ref->{r1qual}";

                if (   ( defined $pair_ref->{r2seq} )
                    && ( $pair_ref->{r2seq} ne '' ) )
                {
                    print $reads_file " $pair_ref->{r2seq} $pair_ref->{r2qual}";
                }
                print $reads_file "\n";
            }
            close $reads_file;
            close $read1_in;
            if ($read2_in) { close $read2_in; }

            # Sort reads files

            $read_filename = "$directory/$sample_name\.reads";
            my $sort_start = length($site)>1 ? length($site)+1 : 1;
            sort_reads_file(
                {
                    directory     => $directory,
                    read_filename => $read_filename,
                    sort_start    => $sort_start,
                    read1_field   => 1
                }
            );

            # Set up to load new reads files
            $file_extension = '.reads';

        }

        cluster_one_sample(
            {
                read_filename     => $read_filename,
                file_extension    => $file_extension,
                directory         => $directory,
                site              => $site,
                cluster_distance  => $cluster_distance,
                quality_threshold => $quality_threshold,
                read_threshold    => $read_threshold,
                trim_site         => $trim_site,
            }
        );
        $pm->finish;
    }
    $pm->wait_all_children;
    if ($verbose) { print "Done\n"; }
    return;
}

#############################################################################
###
### SUBROUTINES
###
#############################################################################

#############################################################################
### Name:       CLUSTER ONE SAMPLE
### Function:   Process reads from one sample into tags
### Parameters: read_filename - file of reads to process
###             file extension - usually .reads or .fastq
###             directory - output directory
###             cluster distance - number of mismatches allowed within cluster 
###             quality_threshold - reject reads with qualities below this
###             read_threshold - reject tags with read counts less than this
### Returns:    nothing (outputs to file)
#############################################################################

sub cluster_one_sample {

    my ($arg_ref) = @_;

    my $read_filename     = $arg_ref->{read_filename};
    my $file_extension    = $arg_ref->{file_extension};
    my $directory         = $arg_ref->{directory};
    my $site              = $arg_ref->{site};
    my $cluster_distance  = $arg_ref->{cluster_distance};
    my $quality_threshold = $arg_ref->{quality_threshold};
    my $read_threshold    = $arg_ref->{read_threshold};
    my $trim_site         = $arg_ref->{trim_site};

    my $first_unique;
    my $unique_hash;
    my $read_length = 0;

    my @cluster;

    my $sample_name = $read_filename;
    $sample_name =~ s/$file_extension//;
    $sample_name =~ s/$directory\///;

    my @bases = qw{A C G T};

    open my $out_file, '>', "$directory\/$sample_name\.tags"
      or die "Can't open $sample_name\.tags: $OS_ERROR!";

    open my $in_file, '<', "$directory/$sample_name$file_extension"
      or die "Can't open $sample_name$file_extension: $OS_ERROR!";

    my $first_read = get_read( $in_file, $site, $trim_site );
    if ( defined $first_read ) {
        $read_length = length $first_read->{r1s};
        $unique_hash =
          get_unique( $in_file, $site, $first_read, $cluster_distance, $trim_site );
        $first_unique = $unique_hash->{uniq};
        $first_read   = $unique_hash->{read};
        push @cluster, $first_unique;
    }
    else { return; }

    my $count = 0;
    while ( $unique_hash =
        get_unique( $in_file, $site, $first_read, $cluster_distance, $trim_site ) )
    {

        # Exit if no good hash could be returned
        # (this happens if there are no more reads and the last reads
        #  are poor quality)
        last if ( !defined $unique_hash );
        my $new_unique = $unique_hash->{uniq};
        $first_read = $unique_hash->{read};

        # Calculate probability that uniques are the same
        my $unique_distance =
          compare_uniques( $first_unique, $new_unique, $read_length );

        if ( $unique_distance > $cluster_distance ) {
            find_cluster_variants( \@cluster, $read_length, $quality_threshold,
                $read_threshold, $out_file );
            $first_unique = $new_unique;
            @cluster      = ($first_unique);
        }
        else {
            push @cluster, $new_unique;
            if ( $new_unique->{count} > $first_unique->{count} ) {
                $first_unique = $new_unique;
            }
        }

        # Exit if no more reads (but the last reads were good, and so
        # the last unique has been processed)
        last if ( !defined $first_read );
    }

    # Process last cluster
    find_cluster_variants( \@cluster, $read_length, $quality_threshold,
        $read_threshold, $out_file );

    close $out_file
      or die "Couldn't close $sample_name\.tags: $OS_ERROR!\n";

    close $in_file;

    return;
}

#############################################################################
### Name:       COMPARE UNIQUES
### Function:   Calculate distance between two unique sequences
### Parameters: Two unique sequences and length of reads
### Returns:    Distance between uniques
#############################################################################

sub compare_uniques {
    my ( $unique_1, $unique_2, $read_length ) = @_;

    my $high_quality_comparisons = 0;
    my $distance                 = 0;
    foreach my $pos ( 0 .. $read_length - 1 ) {
        my $read_a_base = substr $unique_1->{seq}, $pos, 1;
        my $read_b_base = substr $unique_2->{seq}, $pos, 1;
        my $base_prob   = $unique_1->{p}[$pos] * $unique_2->{p}[$pos];

        if ( $base_prob > 0 ) { $high_quality_comparisons++; }
        if ( $read_a_base ne $read_b_base ) {
            $distance += $base_prob;
        }
    }
    my $rounded_distance = sprintf '%2.0f', $distance;
    my $unique_distance =
      $high_quality_comparisons > 0
      ? ( $rounded_distance / $high_quality_comparisons ) * $read_length
      : $read_length;

    return $unique_distance;
}

#############################################################################
### Name:       GET READ
### Function:   Process read line into read record
### Parameters: Read 1 filehandle, restriction site
### Returns:    Hash of read sequences and qualities
#############################################################################

sub get_read {
    my ( $in_file, $site, $trim_site ) = @_;

    my $read1seq;
    my $read1qual;
    my $read2seq;
    my $read2qual;
    my $seq_line;

    return if ( !( $seq_line = <$in_file> ) );
    chomp $seq_line;
    ( $read1seq, $read1qual, $read2seq, $read2qual ) = split / /, $seq_line;

    if ( !defined $read2seq )  { $read2seq  = q{}; }
    if ( !defined $read2qual ) { $read2qual = q{}; }

    $read1qual =~ s/#/!/g;
    $read2qual =~ s/#/!/g;

    # Remove restriction site if present
    if (($trim_site) && ( $site ne '-' )) {
        if ( length($site) >= length($read1seq) ) {
            print "Site $site is longer than read $read1seq; aborting\n";
            return;
        }
        $read1seq  = substr $read1seq,  length($site);
        $read1qual = substr $read1qual, length($site);
    }

    return {
        r1s => $read1seq,
        r1q => $read1qual,
        r2s => $read2seq,
        r2q => $read2qual
    };
}

#############################################################################
### Name:       GET UNIQUE
### Function:   Load in identical reads as one unique and check for quality
### Parameters: Read 1 filehandle, restriction site,
###             first read for comparison,
###             threshold distance for comparison
### Returns:    Finished unique and first read of next unique
#############################################################################

sub get_unique {
    my ( $in_file, $site, $first_read, $cluster_distance, $trim_site ) = @_;

    my $unique_ok = 0;
    my $new_read;
    my $unique;
    while ( !$unique_ok ) {
        $unique = {
            seq   => $first_read->{r1s},
            count => 1,
        };
        push @{ $unique->{q} }, $first_read->{r1q};
        push @{ $unique->{prs}{ $first_read->{r2s} } }, $first_read->{r2q};

        while ( $new_read = get_read( $in_file, $site, $trim_site ) ) {
            last if ( $new_read->{r1s} ne $unique->{seq} );

            $unique->{count}++;
            push @{ $unique->{q} }, $new_read->{r1q};
            push @{ $unique->{prs}{ $new_read->{r2s} } }, $new_read->{r2q};
        }

        # Convert qualities and check that unique is acceptable
        calculate_median_qualities( $unique->{q} );
        my $high_qual_bases = 0;
        map {
            if ( $_ > 0 ) { $high_qual_bases++; }
        } @{ $unique->{q} };
        if ( $high_qual_bases > ( $cluster_distance * 2 ) ) {
            $unique_ok = 1;
        }
        else {
            return if ( !defined $new_read );

            $first_read = $new_read;
            next;
        }

        foreach my $q ( @{ $unique->{q} } ) {
            push @{ $unique->{p} }, 1 - ( 10**( $q / -10 ) );
        }
    }
    return { uniq => $unique, read => $new_read };

}

#############################################################################
### Name:       FIND CLUSTER VARIANTS
### Function:   for a set of uniques assigned to a particular cluster,
###             correct errors and call variants,
###             and output 'alleles' for this cluster
### Parameters: cluster_ref  - array of uniques to process
###             read_length, quality_threshold, read_threshold
### Returns:    Nothing (prints sequences to STDIN)
#############################################################################

sub find_cluster_variants {
    my ( $cluster_ref, $read_length, $quality_threshold, $read_threshold,
        $out_file )
      = @_;

    my @tag_bases;
    my $total_unique_count = 0;

    my @unique_counts_by_pos;

    # Find and score all bases at each position
    foreach my $unique ( @{$cluster_ref} ) {

        my @bases = split //, $unique->{seq};
        foreach my $i ( 0 .. $#bases ) {
            $tag_bases[$i]{ $bases[$i] } += $unique->{count} * $unique->{q}[$i];

            # Count this unique towards the total count for this base
            # if there is a quality score for this unique at this position
            # (ie score is not B)
            if ( $unique->{q}[$i] > 0 ) {
                $unique_counts_by_pos[$i]{ $bases[$i] } += $unique->{count};
            }
        }

        $total_unique_count += $unique->{count};
    }

    # Take averages of base qualities for each base at each position
    foreach my $base (qw{A C G T N}) {

        foreach my $pos ( 0 .. $#tag_bases ) {

            # Only take an average where there are bases with called
            # qualities at this position (rather than 0 for B qual)
            # and there is more than one unique for this base
            # (so not singleton high qualities, which may be errors)

            if (   ( defined $unique_counts_by_pos[$pos]{$base} )
                && ( $unique_counts_by_pos[$pos]{$base} > 1 ) )
            {
                $tag_bases[$pos]{$base} =
                  int( $tag_bases[$pos]{$base} /
                      $unique_counts_by_pos[$pos]{$base} );
            }
            else { $tag_bases[$pos]{$base} = 0; }
        }
    }

    # Separate uniques into allele clusters
    my %alleles;
    my %seq_lookup;
    my @uniques_sorted_by_count =
      reverse sort { $a->{count} <=> $b->{count} } @{$cluster_ref};

    foreach my $unique1 (@uniques_sorted_by_count) {
        next if ( defined $seq_lookup{ $unique1->{seq} } );

        my $unique1_length = length( $unique1->{seq} );
      UNIQUE2:
        foreach my $unique2 (@uniques_sorted_by_count) {

            foreach my $pos ( 0 .. $unique1_length - 1 ) {

                my $base1 = substr $unique1->{seq}, $pos, 1;

                if ( $tag_bases[$pos]{$base1} >= $quality_threshold ) {
                    my $base2 = substr $unique2->{seq}, $pos, 1;
                    next UNIQUE2
                      if ( ( $tag_bases[$pos]{$base2} >= $quality_threshold )
                        && ( $base1 ne $base2 ) );
                }
            }

            # Store unique2 details in unique1 cluster
            $alleles{ $unique1->{seq} }{seqs}{ $unique2->{seq} } = $unique2;

            # Note unique2 as part of the unique1 cluster
            $seq_lookup{ $unique2->{seq} }{seqs}{ $unique1->{seq} } =
              $unique1->{count};
        }
    }

    return if ( keys %alleles == 0 );

    my $passed = 0;

    # Summarise and output alleles
    foreach my $seq ( sort keys %alleles ) {
        my $count = 0;
        my @seq_quals;
        my %pairs;

        foreach my $seq2 ( sort keys %{ $alleles{$seq}{seqs} } ) {

            # Only add seq2 to this cluster if it unambiguously matches
            if ( keys %{ $seq_lookup{$seq2}{seqs} } == 1 ) {

                # Add count
                my $seq2_count = $alleles{$seq}{seqs}{$seq2}{count};
                $count += $seq2_count;

                # Add qual and base
                my @seq2_quals = @{ $alleles{$seq}{seqs}{$seq2}{q} };

                my @seq2_bases = split //, $seq2;
                foreach my $pos ( 0 .. $#seq2_quals ) {
                    $seq_quals[$pos] += $seq2_quals[$pos] * $seq2_count;
                }
                foreach my $pair ( keys %{ $alleles{$seq}{seqs}{$seq2}{prs} } )
                {
                    push @{ $pairs{$pair} },
                      @{ $alleles{$seq}{seqs}{$seq2}{prs}{$pair} };
                }
            }
        }

        # By default, throw away singleton tags (tags with only one read)
        next
          if ( $count < $read_threshold );

        # Throw away pairs containing Ns
        map {
            if (/N/) { delete $pairs{$_}; }
        } keys %pairs;

        my @sorted_pairs = sort keys %pairs;
        my $i            = 0;
        my $j            = 1;
        while ( ( $i + $j ) <= $#sorted_pairs ) {
            my $hamming =
              ( $sorted_pairs[$i] ^ $sorted_pairs[ $i + $j ] ) =~
              tr/\001-\255//;

            if ( $hamming < ( $read_length / 4 ) ) {

                push @{ $pairs{ $sorted_pairs[$i] } },
                  @{ $pairs{ $sorted_pairs[ $i + $j ] } };
                delete $pairs{ $sorted_pairs[ $i + $j ] };
                $j++;
            }
            else { $i = $i + $j; $j = 1; }
        }

        # Throw away tags without a high quality pair, ie a good fragment
        next if ( scalar keys %pairs == 0 );

        # Output tag with qualities, counts and high quality paired ends
        $passed = 1;
        print $out_file "$seq " or die "Can't output sequence\n";
        foreach my $qual (@seq_quals) {
            print $out_file chr( $qual / $count + QUAL_OFFSET )
              or die "Can't output quality\n";
        }
        print $out_file " $count " or die "Can't output read count\n";
        print $out_file scalar keys %pairs
          or die "Can't output fragment count\n";
        print $out_file "\n" or die "Can't output newline\n";
        map {
            if ( $_ ne q{} )
            {
                print $out_file "\t$_ " . scalar @{ $pairs{$_} } . "\n"
                  or die "Can't output paired ends\n";
            }
          }
          sort keys %pairs;
    }
    if ($passed) {
        print $out_file "\n" or die "Can't output end of tag record\n";
    }
    return;
}

#############################################################################
### Name:       CALCULATE MEDIAN QUALITIES
### Function:   finds median qualities for a set of reads with identical
###             sequences, so that a quality string can be assigned to
###             this unique sequence
### Parameters: qual_list_ref - list of quality strings, replaced with
###             quality scores for this sequence
### Returns:    Nothing (fills qual_list_ref)
#############################################################################

sub calculate_median_qualities {
    my ($qual_list_ref) = @_;

    my @full_quals;

    foreach my $qualseq ( @{$qual_list_ref} ) {
        foreach my $i ( 0 .. length($qualseq) - 1 ) {
            push @{ $full_quals[$i] },
              ord( substr $qualseq, $i, 1 ) - QUAL_OFFSET;
        }
    }

    @{$qual_list_ref} = ();

    foreach my $i ( 0 .. $#full_quals ) {
        my @quals = sort { $a <=> $b } @{ $full_quals[$i] };
        my $median = ( $#quals == 0 ) ? 0 : int( $#quals / 2 );
        push @{$qual_list_ref}, $quals[$median];
    }
    return;
}

__END__

#############################################################################
###
### DOCUMENTATION
###
#############################################################################

=head1 NAME

RADtags - Identifies RAD tags in Illumina reads from RAD samples

=head1 VERSION

This documentation refers to RADtools version $main::VERSION.

=head1 SYNOPSIS

=over 8

=item RADtags [options]

=item RADtags -z -s <SITE> [options]

=item RADtags --help

=back

=head1 OPTIONS

=over 8

=item B<-h, --help>

Print a brief help message and exit

=item B<-u, --usage>

Print concise usage and exit

=item B<--man>

Print the manual page and exit

=item B<--version>

Print version number and exit

=item B<-v, verbose>

Verbose output; show names of files being processed

=item B<-f, --file_extension>

Suffix of filenames containing RAD reads (default '.reads'). To load a particular file, use -f filename.

=item B<-d, --directory>

Name of directory containing tags files (ie prefix of pools filename if using RADtools conventions). Default is working directory (.).

=item B<-c, --cluster_distance>

The maximum number of mismatches allowed for a tag to be included in a cluster (default 5)

=item B<-z, --fastq>

Load FASTQ data. -s must also be specified. By default, loads files ending with '.fastq', unless -f is specified. If loading paired end data, files should be named 'samplename_1.fastq' and 'samplename_2.fastq' for each sample; otherwise, the files will be loaded as separate samples.

=item B<-s, --site>

RADpools retains the restriction site overhang on the front of every read, as this sequence can be useful for aligning reads to a reference genome. This option is required for loading FASTQ files so that FASTQ files can be sorted correctly. By default, the restriction site is retained even after sorting; set -t to remove it.

=item B<-t, --trim_site>

Trims the restriction site specified by -s from the start of each read.

=item B<-i, --illumina>

Load data in fastq-illumina format, rather than fastq-sanger format. Only applies to FASTQ input, not reads input. If specified, qualities will be converted to fastq-sanger format.

=item B<-m, --max_processes>

Maximum number of processes to run (default 1). As one process is required for each sample, increasing --max_processes is highly recommended if more cores are available.

=item B<-q, --quality_threshold>

Only consider bases with qualities above this threshold when comparing unique sequences for similarity. Bases below this threshold will be ignored. Default 20.

=item B<-r, --read_threshold>

Reject any tags with counts lower than this threshold. Default 2, ie throw away all tags with just one read associated with them, which are very likely to be due to sequencing error.

=back

=head1 DESCRIPTION

B<RADtags> accepts as input read files output by RADpools. It generates a set of candidate RAD tags for the sample, correcting errors in low quality reads and calling variants. Tags are associated with a read count and a fragment count, which is the number of unique single/paired end combinations seen. Qualities are called for tags. Paired ends are output for each tag, with read counts for each paired end fragment.

=head1 AUTHOR

John Davey <john.davey@ed.ac.uk>

=head1 LICENCE AND COPYRIGHT

Copyright 2010,2011 John Davey, University of Edinburgh john.davey@ed.ac.uk

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

=cut