Prerequisites
The course is intended for those who have basic familiarity with Unix and the R scripting language.
We will also assume that you are familiar with mapping and analyzing bulk RNA-seq data as well as with the commonly available computational tools.
- If a refresher is needed for Unix command line (hopefully not), please go over this tutorial and its companion cheatsheet.
- Getting down to basics: an introduction to the fundamentals of R (courtesy of Mark Ravinet).
- Gentle introduction to
R/Biocondutor
: here - For a full in-depth guide of
Bioconductor
ecosystem: read the comprehensiveR/Bioconductor
book from Kasper D. Hansen available under the CC BY-NC-SA 4.0 license [PDF]
Local configuration
- Ideally (though not strictly required), a configured SSH client (it should be already installed on Linux/Mac machines,
PuTTY
can be set up for Windows). - Ideally (though not strictly required), a SSH ftp client (
Forklift
is excellent for Mac, although not free beyond the trial version;cyberduck
can be used for Windows;FileZilla
can be used for both Mac, Windows and Linux). - Computer with high-speed internet access (no specific configuration required - everything will be performed on a remote AWS machine).
- Zoom visioconference software
Remote configuration
The AWS machine is running with Ubuntu
and has been set up as follows:
## --- Clean up previous R installs and system libraries
sudo apt purge r-base* r-recommended r-cran-*
sudo apt autoremove
sudo apt update
sudo apt upgrade
sudo apt update
## --- Libraries
sudo apt update
sudo apt install libc6 libicu60 -y
sudo apt install -y \
\
gcc g++ perl python3 python3-pip \
automake make cmake less vim nano fort77 \
wget git curl bzip2 gfortran unzip ftp \
libpng-dev libjpeg-dev \
texlive-latex-base default-jre build-essential \
libbz2-dev liblzma-dev libtool \
libxml2 libxml2-dev zlib1g-dev \
libdb-dev libglu1-mesa-dev zlib1g-dev \
libncurses5-dev libghc-zlib-dev libncurses-dev \
libpcre3-dev libxml2-dev \
libblas-dev libzmq3-dev libreadline-dev libssl-dev \
libcurl4-openssl-dev libx11-dev libxt-dev \
x11-common libcairo2-dev \
libreadline6-dev libgsl0-dev \
libeigen3-dev libboost-all-dev \
libgtk2.0-dev xvfb xauth xfonts-base \
apt-transport-https libhdf5-serial-dev \
libudunits2-dev libgdal-dev libgeos-dev libproj-dev \
libv8-dev \
libmagick++-dev \
libharfbuzz-dev libfribidi-dev
fftw3
## --- R base install
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu `lsb_release -cs` -cran40/"
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
sudo apt update
sudo apt install r-base r-recommended r-base-core
The following packages have been installed (along with their many dependencies, of course!):
## --- Install important R packages for single-cell RNA-seq projects
## pak
sudo Rscript -e 'install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))'
## CRAN packages
sudo Rscript -e 'pak::pkg_install(c("tidyverse", "devtools", "umap", "corrplot", "gam", "ggbeeswarm", "ggthemes", "Matrix", "zeallot", "fossil", "rgl", "BiocManager", "Seurat", "rliger", "Signac", "multtest", "metap", "servr"))'
## Bioconductor Packages
sudo Rscript -e 'pak::pkg_install(c("airway", "SingleCellExperiment", "scran", "scater", "batchelor", "DropletUtils", "AUCell", "plyranges", "ggraph", "clustree", "celldex", "SingleR", "slingshot", "tradeSeq", "velociraptor", "BUSpaRse", "org.Mm.eg.db", "org.Hs.eg.db", "TENxPBMCData", "scRNAseq", "scDblFinder", "chromVAR", "EnsDb.Hsapiens.v75", "LoomExperiment", "biovizBase", "rebook", "bluster"))'
## Github Packages
sudo Rscript -e 'install.packages("RcppPlanc", repos = c(linux = 'https://welch-lab.r-universe.dev/bin/linux/noble/4.4/',sources = 'https://welch-lab.r-universe.dev',cran = 'https://cloud.r-project.org'))'
## For ubuntu < 24.04, instal with makevars:
# withr::with_makevars( c(CFLAGS = "-w", CXXFLAGS = "-w", CPPFLAGS = "-w", LDFLAGS = ""), { .... }, assignment = "+=")
## --- Install other softwares (fastQC, samtools, cellranger and cellranger indexes, ffq)
# fastqc samtools
sudo apt install fastqc samtools python3.8
# cellranger
cd /opt/
sudo wget -O cellranger-8.0.1.tar.gz "https://cf.10xgenomics.com/releases/cell-exp/cellranger-8.0.1.tar.gz?Expires=1730426816&Key-Pair-Id=APKAI7S6A5RYOXBWRPDA&Signature=RCkLqP02kkVLRceyojFWbQ4eXrjemlvkAfWxSxbkad0ZqAIjtxTlSsc5Tn3x7PLj-mZiwuTjqx4o~bcR9J5fSBHdtkUrSG~MUqLxquMxcCPom3eR2Or9vdqGvk4faMhv1-jrXS8GCOD82zU8ZdhqecdE5tJdXjLHz6flWo7bQt0jqHF3-roktu0uuXHtebw586Cg39xqzMIG8x9SkeTsRGsJAKB7~jL65YKCEPpAl~QndTghDIymv43gHO8MWiQ7QLbF-nsF16uhqUJR7sAsPZSjCkKPW0GZTIpUmie1nv2OnoenOw9O6-Wj4gIBzcWVHkt-qkmPwvnF3nXq~0xUUw__"
sudo tar -xzvf cellranger-8.0.1.tar.gz
sudo ln -s /opt/cellranger-8.0.1/cellranger /usr/local/bin/cellranger
sudo wget https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-mm10-2020-A.tar.gz
sudo tar -xzvf refdata-gex-mm10-2020-A.tar.gz
sudo mv refdata-gex-mm10-2020-A ~/Share/
## Copy Share from local to AWS
# rsync --progress --recursive --verbose ~/Share/ physalia:
# Make sure write permissions are only for the sudo owner, so that userX cannot modify or erase the Share folder and its contents
sudo chown -R ubuntu:ubuntu ~/Share/
sudo chmod -R 755 ~/Share
# ffq
sudo apt install python3-distutils
sudo pip install ffq
# seqkit
cd /opt/
sudo wget -O seqkit_linux_amd64.tar.gz https://github.com/shenwei356/seqkit/releases/download/v2.4.0/seqkit_linux_amd64.tar.gz
sudo tar -xzvf seqkit_linux_amd64.tar.gz
sudo ln -s /opt/seqkit /usr/local/bin/seqkit
# bcl2fastq
sudo apt install alien
cd /opt/
sudo wget http://support.illumina.com/content/dam/illumina-support/documents/downloads/software/bcl2fastq/bcl2fastq2-v2.17.1.14-Linux-x86_64.rpm
sudo alien bcl2fastq2-v2.17.1.14-Linux-x86_64.rpm
sudo dpkg -i bcl2fastq2_0v2.17.1.14-2_amd64.deb
## Add basilisk external dir to system-wide Renviron
sudo echo "BASILISK_EXTERNAL_DIR=/opt/basilisk_envs" >> /etc/R/Renviron