-
Notifications
You must be signed in to change notification settings - Fork 14
/
rnaseq_analysis_on_input_file.sh
55 lines (39 loc) · 1.58 KB
/
rnaseq_analysis_on_input_file.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#! /bin/bash
# This script runs through STAR and htseq-count on a fastq file specified on the command prompt
# USAGE sh rnaseq_analysis_on_input_file.sh <name of fastq file>
# Run this file from the rnaseq_project directory
cd ~/unix_workshop/rnaseq_project/
# Make directory
mkdir -p results/STAR
#accept information from positional parameter into a variable
fq=$1
#location of genome reference and gtf as variables
genome=/groups/hbctraining/unix_workshop_other/reference_STAR/
gtf=~/unix_workshop/rnaseq_project/data/reference_STAR/chr1-hg19_genes.gtf
#Load modules
module load seq/STAR/2.4.0j
module load seq/samtools/1.3
module load seq/htseq/0.6.1
#create output directories
mkdir -p ~/unix_workshop/rnaseq_project/results/STAR
mkdir -p ~/unix_workshop/rnaseq_project/results/counts
# grab base of filename for future naming
base=$(basename $fq .subset.fq.qualtrim25.minlen35.fq)
echo "basename is $base"
# set up output filenames and locations
align_out=~/unix_workshop/rnaseq_project/results/STAR/${base}_
counts_input_bam=~/unix_workshop/rnaseq_project/results/STAR/${base}_Aligned.sortedByCoord.out.bam
counts=~/unix_workshop/rnaseq_project/results/counts/${base}.counts
echo "starting STAR run"
# Run STAR on Mov10_oe_1
STAR --runThreadN 6 --genomeDir $genome \
--readFilesIn $fq \
--outFileNamePrefix $align_out \
--outSAMtype BAM SortedByCoordinate \
--outSAMunmapped Within \
--outSAMattributes NH HI NM MD AS
echo "completed STAR run"
# Create BAM index
samtools index $counts_input_bam
# Count mapped reads
htseq-count --stranded reverse --format bam $counts_input_bam $gtf > $counts