r
library(rtracklayer)
library(tidyverse)
bedfiles <- list.files('Share/peaks/modENCODE/', pattern = '*bed')
peaks <- map(bedfiles, ~ import(file.path('Share/peaks/modENCODE/', .x)))
names(peaks) <- str_replace(bedfiles, '.bed', '')Check out the modENCODE/modERN TF ChIP-seq database.
Inspect the available datasets, by filtering e.g. using the following criteria:
TF ChIP-seq
C. elegans
ce11
modENCODE
Peak files for 12 ChIP-seq datasets have been already downloaded from this database.
bed files available for the peaks.GRanges object.r
library(rtracklayer)
library(tidyverse)
bedfiles <- list.files('Share/peaks/modENCODE/', pattern = '*bed')
peaks <- map(bedfiles, ~ import(file.path('Share/peaks/modENCODE/', .x)))
names(peaks) <- str_replace(bedfiles, '.bed', '')ce11
Genomic features can be easily annotated from a set of gene features. ChIPseeker facilitates the annotation of ChIP-seq peaks using gene annotations directly provided in R by TxDb gene annotation packages, e.g. TxDb.Celegans.UCSC.ce11.ensGene
TxDb.Celegans.UCSC.ce11.ensGene package in RChIPseeker
r
library(ChIPseeker)
library(TxDb.Celegans.UCSC.ce11.ensGene)ChIPseeker to annotate ChIP-seq peaks for a single ChIP-seq experiment.r
ceh48 <- peaks[[1]]
annots_ceh48 <- annotatePeak(ceh48, TxDb = TxDb.Celegans.UCSC.ce11.ensGene)
getAnnoStat(annots_ceh48)peaks list and return an aggregate data.frame, use the imap_dfr.r
annots <- imap_dfr(peaks, ~ annotatePeak(.x, TxDb = TxDb.Celegans.UCSC.ce11.ensGene) |>
getAnnoStat() |>
mutate(TF = .y)
)
annotsR
library(readxl)
download.file('https://genome.cshlp.org/content/suppl/2020/11/16/gr.265934.120.DC1/Supplemental_Table_S2.xlsx', 'WBcel235_REs.xlsx')
REs <- read_xlsx('WBcel235_REs.xlsx', skip = 2, col_names = TRUE)
REs <- makeGRangesFromDataFrame(
REs, seqnames.field = 'chrom_ce11',
start.field = 'start_ce11', end.field = 'end_ce11',
keep.extra.columns = TRUE
)
REsxnd-1 peaks.xnd-1 peaks.xnd-1 peaks.R
table(REs %over% peaks[['xnd-1']])
table(REs %over% peaks[['xnd-1']], REs$Annotation)
table(REs %over% peaks[['xnd-1']], REs$Annotation == 'Germline')
table(REs %over% peaks[['xnd-1']], REs$Annotation == 'Germline') |> fisher.test()Germline, Neurons, Muscle, Hypod. and Intest.), check whether the tissue-specific REs are enriched in xnd-1 peak.tissue and return an aggregated data.frame using map_dfr.fisher.test() into a tibble with the glance function from broom package.map_dfr, iterating first over each TF then over each tissue.R
TFs <- names(peaks)
tissues <- c("Germline", "Neurons", "Muscle", "Hypod.", "Intest.")
df <- map_dfr(TFs, function(TF) {
map_dfr(tissues, function(tissue) {
fisher.test(
REs %over% peaks[[TF]],
REs$Annotation == tissue
) |>
broom::glance() |>
mutate(tissue = tissue, TF = TF) |>
dplyr::select(tissue, TF, estimate, p.value)
})
})
df