16 Demo 4 - Processing of RNA-seq data
Aims
- Manually process RNA-seq reads with
STAR - Generate stranded RNA-seq tracks with
bamCoverage - Estimate transcript abundance from RNA-seq samples with
featureCounts
Datasets
RNA-seq data was published in Nuño-Cabanes et al., Scientific Data 2020
Control RNA-seq @ 30 C:
SRR9929263SRR9929264SRR9929273SRR9929282
Heat shock RNA-seq @ 39 C, 20 min:
SRR2045248&SRR2045249SRR9929271SRR9929265SRR9929280SRR9929274
16.1 Indexing genome for STAR index
16.2 Mapping RNA-seq on R64-1-1
sh
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929263/SRR9929263_1.fastq.gz -o data/RNAseq_WT_rep1_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929263/SRR9929263_2.fastq.gz -o data/RNAseq_WT_rep1_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929264/SRR9929264_1.fastq.gz -o data/RNAseq_WT_rep2_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929264/SRR9929264_2.fastq.gz -o data/RNAseq_WT_rep2_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929273/SRR9929273_1.fastq.gz -o data/RNAseq_WT_rep3_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929273/SRR9929273_2.fastq.gz -o data/RNAseq_WT_rep3_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/002/SRR9929282/SRR9929282_1.fastq.gz -o data/RNAseq_WT_rep4_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/002/SRR9929282/SRR9929282_2.fastq.gz -o data/RNAseq_WT_rep4_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/001/SRR9929271/SRR9929271_1.fastq.gz -o data/RNAseq_HS_rep1_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/001/SRR9929271/SRR9929271_2.fastq.gz -o data/RNAseq_HS_rep1_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/005/SRR9929265/SRR9929265_1.fastq.gz -o data/RNAseq_HS_rep2_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/005/SRR9929265/SRR9929265_2.fastq.gz -o data/RNAseq_HS_rep2_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/000/SRR9929280/SRR9929280_1.fastq.gz -o data/RNAseq_HS_rep3_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/000/SRR9929280/SRR9929280_2.fastq.gz -o data/RNAseq_HS_rep3_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929274/SRR9929274_1.fastq.gz -o data/RNAseq_HS_rep4_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929274/SRR9929274_2.fastq.gz -o data/RNAseq_HS_rep4_R2.fq.gz
for REP in WT_rep1 WT_rep2 WT_rep3 WT_rep4 HS_rep1 HS_rep2 HS_rep3 HS_rep4
do
STAR \
--genomeDir data/genome \
--readFilesCommand zcat \
--runThreadN 12 \
--readFilesIn data/RNAseq_"${REP}"_R1.fq.gz data/RNAseq_"${REP}"_R2.fq.gz \
--outFileNamePrefix data/mapping/RNAseq_"${REP}"_ \
--outSAMtype BAM Unsorted \
--outSAMunmapped None \
--outSAMattributes Standard
samtools fixmate -@ 16 --output-fmt bam -m data/mapping/RNAseq_"${REP}"_Aligned.out.bam - \
| samtools sort -@ 16 --output-fmt bam - \
| samtools markdup -@ 16 --output-fmt bam -r - - \
| samtools view -@ 16 --output-fmt bam -f 0x001 -f 0x002 -F 0x004 -F 0x008 -q 20 -1 -b - \
| samtools sort -@ 16 --output-fmt bam -l 9 -o data/mapping/RNAseq_"${REP}"_R64-1-1.bam
samtools index -@ 16 data/mapping/RNAseq_"${REP}"_R64-1-1.bam
bamCoverage \
--bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
--outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.unstranded.CPM.bw \
--binSize 1 \
--numberOfProcessors 16 \
--normalizeUsing CPM \
--skipNonCoveredRegions \
--extendReads \
--ignoreDuplicates
bamCoverage \
--bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
--outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.fwd.CPM.bw \
--binSize 1 \
--numberOfProcessors 16 \
--normalizeUsing CPM \
--skipNonCoveredRegions \
--extendReads \
--ignoreDuplicates \
--filterRNAstrand forward
bamCoverage \
--bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
--outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.rev.CPM.bw \
--binSize 1 \
--numberOfProcessors 16 \
--normalizeUsing CPM \
--skipNonCoveredRegions \
--extendReads \
--ignoreDuplicates \
--filterRNAstrand reverse
done16.3 Counting reads over gene annotations
16.3.1 Counting reads over peaks
sh
mkdir data/counts
featureCounts \
-g gene_name \
-s 2 \
-p --countReadPairs \
-T 12 \
-a data/genome/R64-1-1.gtf \
-o data/counts/RNAseq_counts.tsv \
data/mapping/RNAseq_WT_rep1_R64-1-1.bam \
data/mapping/RNAseq_WT_rep2_R64-1-1.bam \
data/mapping/RNAseq_WT_rep3_R64-1-1.bam \
data/mapping/RNAseq_WT_rep4_R64-1-1.bam