16  Demo 4 - Processing of RNA-seq data

Aims
  • Manually process RNA-seq reads with STAR
  • Generate stranded RNA-seq tracks with bamCoverage
  • Estimate transcript abundance from RNA-seq samples with featureCounts
Datasets

RNA-seq data was published in Nuño-Cabanes et al., Scientific Data 2020

  • Control RNA-seq @ 30 C:

    • SRR9929263
    • SRR9929264
    • SRR9929273
    • SRR9929282
  • Heat shock RNA-seq @ 39 C, 20 min: SRR2045248 & SRR2045249

    • SRR9929271
    • SRR9929265
    • SRR9929280
    • SRR9929274

16.1 Indexing genome for STAR index

sh
STAR \
    --runThreadN 12 \
    --runMode genomeGenerate \
    --genomeDir data/genome/ \
    --genomeFastaFiles data/genome/R64-1-1.fa \
    --sjdbGTFfile data/genome/R64-1-1.gtf \
    --sjdbOverhang 99

16.2 Mapping RNA-seq on R64-1-1

sh
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929263/SRR9929263_1.fastq.gz -o data/RNAseq_WT_rep1_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929263/SRR9929263_2.fastq.gz -o data/RNAseq_WT_rep1_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929264/SRR9929264_1.fastq.gz -o data/RNAseq_WT_rep2_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929264/SRR9929264_2.fastq.gz -o data/RNAseq_WT_rep2_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929273/SRR9929273_1.fastq.gz -o data/RNAseq_WT_rep3_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/003/SRR9929273/SRR9929273_2.fastq.gz -o data/RNAseq_WT_rep3_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/002/SRR9929282/SRR9929282_1.fastq.gz -o data/RNAseq_WT_rep4_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/002/SRR9929282/SRR9929282_2.fastq.gz -o data/RNAseq_WT_rep4_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/001/SRR9929271/SRR9929271_1.fastq.gz -o data/RNAseq_HS_rep1_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/001/SRR9929271/SRR9929271_2.fastq.gz -o data/RNAseq_HS_rep1_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/005/SRR9929265/SRR9929265_1.fastq.gz -o data/RNAseq_HS_rep2_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/005/SRR9929265/SRR9929265_2.fastq.gz -o data/RNAseq_HS_rep2_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/000/SRR9929280/SRR9929280_1.fastq.gz -o data/RNAseq_HS_rep3_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/000/SRR9929280/SRR9929280_2.fastq.gz -o data/RNAseq_HS_rep3_R2.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929274/SRR9929274_1.fastq.gz -o data/RNAseq_HS_rep4_R1.fq.gz
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR992/004/SRR9929274/SRR9929274_2.fastq.gz -o data/RNAseq_HS_rep4_R2.fq.gz

for REP in WT_rep1 WT_rep2 WT_rep3 WT_rep4 HS_rep1 HS_rep2 HS_rep3 HS_rep4
do 

    STAR \
        --genomeDir data/genome \
        --readFilesCommand zcat \
        --runThreadN 12 \
        --readFilesIn data/RNAseq_"${REP}"_R1.fq.gz data/RNAseq_"${REP}"_R2.fq.gz \
        --outFileNamePrefix data/mapping/RNAseq_"${REP}"_ \
        --outSAMtype BAM Unsorted \
        --outSAMunmapped None \
        --outSAMattributes Standard 

    samtools fixmate -@ 16 --output-fmt bam -m data/mapping/RNAseq_"${REP}"_Aligned.out.bam - \
        | samtools sort -@ 16 --output-fmt bam - \
        | samtools markdup -@ 16 --output-fmt bam -r - - \
        | samtools view -@ 16 --output-fmt bam -f 0x001 -f 0x002 -F 0x004 -F 0x008 -q 20 -1 -b - \
        | samtools sort -@ 16 --output-fmt bam -l 9 -o data/mapping/RNAseq_"${REP}"_R64-1-1.bam

    samtools index -@ 16 data/mapping/RNAseq_"${REP}"_R64-1-1.bam

    bamCoverage \
        --bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
        --outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.unstranded.CPM.bw \
        --binSize 1 \
        --numberOfProcessors 16 \
        --normalizeUsing CPM \
        --skipNonCoveredRegions \
        --extendReads \
        --ignoreDuplicates

    bamCoverage \
        --bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
        --outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.fwd.CPM.bw \
        --binSize 1 \
        --numberOfProcessors 16 \
        --normalizeUsing CPM \
        --skipNonCoveredRegions \
        --extendReads \
        --ignoreDuplicates \
        --filterRNAstrand forward

    bamCoverage \
        --bam data/mapping/RNAseq_"${REP}"_R64-1-1.bam \
        --outFileName data/tracks/RNAseq_"${REP}"_R64-1-1.rev.CPM.bw \
        --binSize 1 \
        --numberOfProcessors 16 \
        --normalizeUsing CPM \
        --skipNonCoveredRegions \
        --extendReads \
        --ignoreDuplicates \
        --filterRNAstrand reverse

done

16.3 Counting reads over gene annotations

16.3.1 Counting reads over peaks

sh
mkdir data/counts 
featureCounts \
    -g gene_name \
    -s 2 \
    -p --countReadPairs \
    -T 12 \
    -a data/genome/R64-1-1.gtf \
    -o data/counts/RNAseq_counts.tsv \
    data/mapping/RNAseq_WT_rep1_R64-1-1.bam \
    data/mapping/RNAseq_WT_rep2_R64-1-1.bam \
    data/mapping/RNAseq_WT_rep3_R64-1-1.bam \
    data/mapping/RNAseq_WT_rep4_R64-1-1.bam