library(ggplot2)
library(cowplot)
library(purrr)
library(HiCExperiment)
## Consider using the `HiContacts` package to perform advanced genomic operations
## on `HiCExperiment` objects.
##
## Read "Orchestrating Hi-C analysis with Bioconductor" online book to learn more:
## https://js2264.github.io/OHCA/
##
## Attaching package: 'HiCExperiment'
## The following object is masked from 'package:ggplot2':
##
## resolution
library(HiContactsData)
## Loading required package: ExperimentHub
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:HiCExperiment':
##
## as.data.frame
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, aperm,
## append, as.data.frame, basename, cbind, colnames, dirname,
## do.call, duplicated, eval, evalq, get, grep, grepl, intersect,
## is.unsorted, lapply, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply,
## setdiff, table, tapply, union, unique, unsplit, which.max,
## which.min
## Loading required package: AnnotationHub
## Loading required package: BiocFileCache
## Loading required package: dbplyr
library(fourDNData)
Workflow 2: Chromosome compartment cohesion upon mitosis entry
This chapter illustrates how to:
- Annotate compartments for a list of HiC experiments
- Generate saddle plots for a list of HiC experiments
- Quantify changes in interactions between compartments between different timepoints
We leverage five chicken datasets in this notebook, published in Gibcus et al. (2018). They are all available from the 4DN data portal using the fourDNData
package.
-
4DNES9LEZXN7
: chicken cell culture blocked in G2 -
4DNESNWWIFZU
: chicken cell culture released from G2 block (5min) -
4DNESGDXKM2I
: chicken cell culture released from G2 block (10min) -
4DNESIR416OW
: chicken cell culture released from G2 block (15min) -
4DNESS8PTK6F
: chicken cell culture released from G2 block (30min)
Importing data
The 4DN consortium provides access to the datasets published in Gibcus et al. (2018). in R
, they can be obtained thanks to the fourDNData
gateway package.
The first time the following chunk of code is executed, it will cache a large amount of data (mostly consisting of contact matrices stored in .mcool
files).
library(HiCExperiment)
library(fourDNData)
library(BiocParallel)
samples <- list(
'4DNES9LEZXN7' = 'G2 block',
'4DNESNWWIFZU' = 'prophase (5m)',
'4DNESGDXKM2I' = 'prophase (10m)',
'4DNESIR416OW' = 'prometaphase (15m)',
'4DNESS8PTK6F' = 'prometaphase (30m)'
)
bpparam <- MulticoreParam(workers = 5, progressbar = FALSE)
hics <- bplapply(names(samples), fourDNHiCExperiment, BPPARAM = bpparam)
names(hics) <- samples
hics[["G2 block"]]
## `HiCExperiment` object with 150,494,008 contacts over 4,109 regions
## -------
## fileName: "/root/.cache/R/fourDNData/15f21f56ea78_4DNFIT479GDR.mcool"
## focus: "whole genome"
## resolutions(13): 1000 2000 ... 5000000 10000000
## active resolution: 250000
## interactions: 7262748
## scores(2): count balanced
## topologicalFeatures: compartments(891) borders(3465)
## pairsFile: N/A
## metadata(3): 4DN_info eigens diamond_insulation
Plotting whole chromosome matrices
We can visualize the five different Hi-C maps on the entire chromosome 3
with HiContacts
by iterating over each of the HiCExperiment
objects.
library(purrr)
library(HiContacts)
## Registered S3 methods overwritten by 'readr':
## method from
## as.data.frame.spec_tbl_df vroom
## as_tibble.spec_tbl_df vroom
## format.col_spec vroom
## print.col_spec vroom
## print.collector vroom
## print.date_names vroom
## print.locale vroom
## str.col_spec vroom
library(ggplot2)
pl <- imap(hics, ~ .x['chr3'] |>
zoom(100000) |>
plotMatrix(use.scores = 'balanced', limits = c(-4, -1), caption = FALSE) +
ggtitle(.y)
)
library(cowplot)
plot_grid(plotlist = pl, nrow = 1)
This highlights the progressive remodeling of chromatin into condensed chromosomes, starting as soon as 5’ after release from G2 phase.
Zooming on a chromosome section
Zooming on a chromosome section, we can plot the Hi-C autocorrelation matrix for each timepoint. These matrices are generally used to highlight the overall correlation of interaction profiles between different segments of a chromosome section (see Chapter 5 for more details).
## --- Format compartment positions of chr. 4 segment
.chr <- 'chr4'
.start <- 59000000L
.stop <- 75000000L
library(GenomicRanges)
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:HiCExperiment':
##
## metadata<-
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## I, expand.grid, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
coords <- GRanges(paste0(.chr, ':', .start, '-', .stop))
compts_df <- topologicalFeatures(hics[["G2 block"]], "compartments") |>
subsetByOverlaps(coords, type = 'within') |>
as.data.frame()
compts_gg <- geom_rect(
data = compts_df,
mapping = aes(xmin = start, xmax = end, ymin = -500000, ymax = 0, alpha = compartment),
col = 'black', inherit.aes = FALSE
)
## --- Subset contact matrices to chr. 4 segment and computing autocorrelation scores
g2 <- hics[["G2 block"]] |>
zoom(100000) |>
subsetByOverlaps(coords) |>
autocorrelate()
##
pro5 <- hics[["prophase (5m)"]] |>
zoom(100000) |>
subsetByOverlaps(coords) |>
autocorrelate()
pro30 <- hics[["prometaphase (30m)"]] |>
zoom(100000) |>
subsetByOverlaps(coords) |>
autocorrelate()
## --- Plot autocorrelation matrices
plot_grid(
plotMatrix(
subsetByOverlaps(g2, coords),
use.scores = 'autocorrelated',
scale = 'linear',
limits = c(-1, 1),
cmap = bwrColors(),
maxDistance = 10000000,
caption = FALSE
) + ggtitle('G2') + compts_gg,
plotMatrix(
subsetByOverlaps(pro5, coords),
use.scores = 'autocorrelated',
scale = 'linear',
limits = c(-1, 1),
cmap = bwrColors(),
maxDistance = 10000000,
caption = FALSE
) + ggtitle('Prophase 5min') + compts_gg,
plotMatrix(
subsetByOverlaps(pro30, coords),
use.scores = 'autocorrelated',
scale = 'linear',
limits = c(-1, 1),
cmap = bwrColors(),
maxDistance = 10000000,
caption = FALSE
) + ggtitle('Prometaphase 30min') + compts_gg,
nrow = 1
)
## Warning: Using alpha for a discrete variable is not advised.
## Using alpha for a discrete variable is not advised.
## Using alpha for a discrete variable is not advised.
These correlation matrices suggest that there are two different regimes of chromatin compartment remodeling in this chromosome section:
- Correlation scores between genomic bins within the compartment A remain positive 5’ after G2 release (albeit reduced compared to G2 block) and eventually become null 30’ after G2 release.
- Correlation scores between genomic bins within the compartment B are overall null as soon as 5’ after G2 release.
Generating saddle plots
Saddle plots are typically used to measure the observed
vs. expected
interaction scores within or between genomic loci belonging to A and B compartments. Here, they can be used to check whether the two regimes of chromatin compartment remodeling are observed genome-wide.
Non-overlapping genomic windows are grouped by nbins
quantiles (typically between 10 and 50 bins) according to their A/B compartment eigenvector value, from lowest eigenvector values (i.e. strongest B compartments) to highest eigenvector values (i.e. strongest A compartments). The average observed
vs. expected
interaction scores are computed for pairwise eigenvector quantiles and plotted in a 2D heatmap.
pl <- imap(hics, ~ plotSaddle(.x, nbins = 38, BPPARAM = bpparam) + ggtitle(.y))
plot_grid(plotlist = pl, nrow = 1)
These plots confirm the previous observation made on chr. 4
and reveal that intra-B compartment interactions are generally lost 5’ after G2 release, while intra-A interactions take up to 15’ after G2 release to disappear.
The plotSaddle()
function requires an eigenvector corresponding to A/B compartments. In this example, this eigenvector is recovered from the 4DN data portal. If not already available, this eigenvector can be computed from the contact matrix using the getCompartments()
function.
Quantifying interactions within and between compartments
We can leverage the replicate-merged contact matrices to quantify the interaction frequencies within A or B compartments or between A and B compartments, at different timepoints.
We can use the A/B compartment annotations obtained at the G2 block
timepoint and extract O/E
(observed vs expected) scores for interactions within A or B compartments or between A and B compartments, at different timepoints.
## --- Extract the A/B compartments identified in G2 block
compts <- topologicalFeatures(hics[["G2 block"]], "compartments")
compts$ID <- paste0(compts$compartment, seq_along(compts))
## --- Iterate over timepoints to extract `detrended` (O/E) scores and
## compartment annotations
library(tibble)
library(plyranges)
##
## Attaching package: 'plyranges'
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:stats':
##
## filter
df <- imap(hics[c(1, 2, 5)], ~ {
ints <- cis(.x) |> ## Filter out trans interactions
detrend() |> ## Compute O/E scores
interactions() ## Recover interactions
ints$comp_first <- join_overlap_left(anchors(ints, "first"), compts)$ID
ints$comp_second <- join_overlap_left(anchors(ints, "second"), compts)$ID
tibble(
sample = .y,
bin1 = ints$comp_first,
bin2 = ints$comp_second,
dist = InteractionSet::pairdist(ints),
OE = ints$detrended
) |>
filter(dist > 5e6) |>
mutate(type = dplyr::case_when(
grepl('A', bin1) & grepl('A', bin2) ~ 'AA',
grepl('B', bin1) & grepl('B', bin2) ~ 'BB',
grepl('A', bin1) & grepl('B', bin2) ~ 'AB',
grepl('B', bin1) & grepl('A', bin2) ~ 'BA'
)) |>
filter(bin1 != bin2)
}) |> list_rbind() |> mutate(
sample = factor(sample, names(hics)[c(1, 2, 5)])
)
We can now plot the changes in O/E scores for intra-A, intra-B, A-B or B-A interactions, splitting boxplots by timepoint.
ggplot(df, aes(x = type, y = OE, group = type, fill = type)) +
geom_boxplot(outlier.shape = NA) +
facet_grid(~sample) +
theme_bw() +
ylim(c(-2, 2))
## Warning: Removed 66307 rows containing non-finite values (`stat_boxplot()`).
This visualization suggests that interactions between genomic loci belonging to the B compartment are lost more rapidly than those between genomic loci belonging to the A compartment, when cells are released from G2 to enter mitosis.
Session info
sessioninfo::session_info(include_base = TRUE)
## ─ Session info ────────────────────────────────────────────────────────────
## setting value
## version R Under development (unstable) (2024-01-17 r85813)
## os Ubuntu 22.04.3 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate C
## ctype en_US.UTF-8
## tz Etc/UTC
## date 2024-01-22
## pandoc 3.1.1 @ /usr/local/bin/ (via rmarkdown)
##
## ─ Packages ────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.4.0)
## AnnotationDbi 1.65.2 2023-11-03 [2] Bioconductor
## AnnotationHub * 3.11.1 2023-12-11 [2] Bioconductor 3.19 (R 4.4.0)
## backports 1.4.1 2021-12-13 [2] CRAN (R 4.4.0)
## base * 4.4.0 2024-01-18 [3] local
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 4.4.0)
## beeswarm 0.4.0 2021-06-01 [2] CRAN (R 4.4.0)
## Biobase 2.63.0 2023-10-24 [2] Bioconductor
## BiocFileCache * 2.11.1 2023-10-26 [2] Bioconductor
## BiocGenerics * 0.49.1 2023-11-01 [2] Bioconductor
## BiocIO 1.13.0 2023-10-24 [2] Bioconductor
## BiocManager 1.30.22 2023-08-08 [2] CRAN (R 4.4.0)
## BiocParallel * 1.37.0 2023-10-24 [2] Bioconductor
## BiocVersion 3.19.1 2023-10-26 [2] Bioconductor
## Biostrings 2.71.1 2023-10-25 [2] Bioconductor
## bit 4.0.5 2022-11-15 [2] CRAN (R 4.4.0)
## bit64 4.0.5 2020-08-30 [2] CRAN (R 4.4.0)
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.4.0)
## blob 1.2.4 2023-03-17 [2] CRAN (R 4.4.0)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.4.0)
## Cairo 1.6-2 2023-11-28 [2] CRAN (R 4.4.0)
## checkmate 2.3.1 2023-12-04 [2] CRAN (R 4.4.0)
## cli 3.6.2 2023-12-11 [2] CRAN (R 4.4.0)
## cluster 2.1.6 2023-12-01 [3] CRAN (R 4.4.0)
## codetools 0.2-19 2023-02-01 [3] CRAN (R 4.4.0)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.4.0)
## compiler 4.4.0 2024-01-18 [3] local
## cowplot * 1.1.2 2023-12-15 [2] CRAN (R 4.4.0)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.4.0)
## curl 5.2.0 2023-12-08 [2] CRAN (R 4.4.0)
## data.table 1.14.10 2023-12-08 [2] CRAN (R 4.4.0)
## datasets * 4.4.0 2024-01-18 [3] local
## DBI 1.2.1 2024-01-12 [2] CRAN (R 4.4.0)
## dbplyr * 2.4.0 2023-10-26 [2] CRAN (R 4.4.0)
## DelayedArray 0.29.0 2023-10-24 [2] Bioconductor
## digest 0.6.34 2024-01-11 [2] CRAN (R 4.4.0)
## doParallel 1.0.17 2022-02-07 [2] CRAN (R 4.4.0)
## dplyr 1.1.4 2023-11-17 [2] CRAN (R 4.4.0)
## dynamicTreeCut 1.63-1 2016-03-11 [2] CRAN (R 4.4.0)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.4.0)
## ExperimentHub * 2.11.1 2023-12-11 [2] Bioconductor 3.19 (R 4.4.0)
## fansi 1.0.6 2023-12-08 [2] CRAN (R 4.4.0)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.4.0)
## fastcluster 1.2.6 2024-01-12 [2] CRAN (R 4.4.0)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.4.0)
## filelock 1.0.3 2023-12-11 [2] CRAN (R 4.4.0)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.4.0)
## foreign 0.8-86 2023-11-28 [3] CRAN (R 4.4.0)
## Formula 1.2-5 2023-02-24 [2] CRAN (R 4.4.0)
## fourDNData * 1.3.0 2023-10-31 [2] Bioconductor
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.4.0)
## GenomeInfoDb * 1.39.5 2024-01-01 [2] Bioconductor 3.19 (R 4.4.0)
## GenomeInfoDbData 1.2.11 2024-01-22 [2] Bioconductor
## GenomicAlignments 1.39.2 2024-01-16 [2] Bioconductor 3.19 (R 4.4.0)
## GenomicRanges * 1.55.1 2023-10-29 [2] Bioconductor
## ggbeeswarm 0.7.2 2023-04-29 [2] CRAN (R 4.4.0)
## ggplot2 * 3.4.4 2023-10-12 [2] CRAN (R 4.4.0)
## ggrastr 1.0.2 2023-06-01 [2] CRAN (R 4.4.0)
## glue 1.7.0 2024-01-09 [2] CRAN (R 4.4.0)
## GO.db 3.18.0 2024-01-22 [2] Bioconductor
## graphics * 4.4.0 2024-01-18 [3] local
## grDevices * 4.4.0 2024-01-18 [3] local
## grid 4.4.0 2024-01-18 [3] local
## gridExtra 2.3 2017-09-09 [2] CRAN (R 4.4.0)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.4.0)
## HiCExperiment * 1.3.0 2023-10-24 [2] Bioconductor
## HiContacts * 1.5.0 2023-10-24 [2] Bioconductor
## HiContactsData * 1.5.3 2024-01-22 [2] Github (js2264/HiContactsData@d5bebe7)
## Hmisc 5.1-1 2023-09-12 [2] CRAN (R 4.4.0)
## hms 1.1.3 2023-03-21 [2] CRAN (R 4.4.0)
## htmlTable 2.4.2 2023-10-29 [2] CRAN (R 4.4.0)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.4.0)
## htmlwidgets 1.6.4 2023-12-06 [2] CRAN (R 4.4.0)
## httr 1.4.7 2023-08-15 [2] CRAN (R 4.4.0)
## impute 1.77.0 2023-10-24 [2] Bioconductor
## InteractionSet 1.31.0 2023-10-24 [2] Bioconductor
## IRanges * 2.37.1 2024-01-19 [2] Bioconductor 3.19 (R 4.4.0)
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.4.0)
## jsonlite 1.8.8 2023-12-04 [2] CRAN (R 4.4.0)
## KEGGREST 1.43.0 2023-10-24 [2] Bioconductor
## knitr 1.45 2023-10-30 [2] CRAN (R 4.4.0)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.4.0)
## lattice 0.22-5 2023-10-24 [3] CRAN (R 4.4.0)
## lifecycle 1.0.4 2023-11-07 [2] CRAN (R 4.4.0)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.4.0)
## Matrix 1.6-5 2024-01-11 [3] CRAN (R 4.4.0)
## MatrixGenerics 1.15.0 2023-10-24 [2] Bioconductor
## matrixStats 1.2.0 2023-12-11 [2] CRAN (R 4.4.0)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.4.0)
## methods * 4.4.0 2024-01-18 [3] local
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.4.0)
## nnet 7.3-19 2023-05-03 [3] CRAN (R 4.4.0)
## parallel 4.4.0 2024-01-18 [3] local
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.4.0)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.4.0)
## plyranges * 1.23.0 2023-10-24 [2] Bioconductor
## png 0.1-8 2022-11-29 [2] CRAN (R 4.4.0)
## preprocessCore 1.65.0 2023-10-24 [2] Bioconductor
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.4.0)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.4.0)
## rappdirs 0.3.3 2021-01-31 [2] CRAN (R 4.4.0)
## Rcpp 1.0.12 2024-01-09 [2] CRAN (R 4.4.0)
## RCurl 1.98-1.14 2024-01-09 [2] CRAN (R 4.4.0)
## readr 2.1.5 2024-01-10 [2] CRAN (R 4.4.0)
## restfulr 0.0.15 2022-06-16 [2] CRAN (R 4.4.0)
## rhdf5 2.47.2 2024-01-15 [2] Bioconductor 3.19 (R 4.4.0)
## rhdf5filters 1.15.1 2023-11-06 [2] Bioconductor
## Rhdf5lib 1.25.1 2023-12-11 [2] Bioconductor 3.19 (R 4.4.0)
## rjson 0.2.21 2022-01-09 [2] CRAN (R 4.4.0)
## rlang 1.1.3 2024-01-10 [2] CRAN (R 4.4.0)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.4.0)
## rpart 4.1.23 2023-12-05 [3] CRAN (R 4.4.0)
## Rsamtools 2.19.3 2024-01-17 [2] Bioconductor 3.19 (R 4.4.0)
## RSpectra 0.16-1 2022-04-24 [2] CRAN (R 4.4.0)
## RSQLite 2.3.5 2024-01-21 [2] CRAN (R 4.4.0)
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.4.0)
## rtracklayer 1.63.0 2024-01-22 [2] Github (lawremi/rtracklayer@86407bb)
## S4Arrays 1.3.2 2024-01-14 [2] Bioconductor 3.19 (R 4.4.0)
## S4Vectors * 0.41.3 2024-01-01 [2] Bioconductor 3.19 (R 4.4.0)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.4.0)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.4.0)
## SparseArray 1.3.3 2024-01-14 [2] Bioconductor 3.19 (R 4.4.0)
## splines 4.4.0 2024-01-18 [3] local
## stats * 4.4.0 2024-01-18 [3] local
## stats4 * 4.4.0 2024-01-18 [3] local
## strawr 0.0.91 2023-03-29 [2] CRAN (R 4.4.0)
## stringi 1.8.3 2023-12-11 [2] CRAN (R 4.4.0)
## stringr 1.5.1 2023-11-14 [2] CRAN (R 4.4.0)
## SummarizedExperiment 1.33.2 2024-01-07 [2] Bioconductor 3.19 (R 4.4.0)
## survival 3.5-7 2023-08-14 [3] CRAN (R 4.4.0)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.4.0)
## tidyr 1.3.0 2023-01-24 [2] CRAN (R 4.4.0)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.4.0)
## tools 4.4.0 2024-01-18 [3] local
## tzdb 0.4.0 2023-05-12 [2] CRAN (R 4.4.0)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.4.0)
## utils * 4.4.0 2024-01-18 [3] local
## vctrs 0.6.5 2023-12-01 [2] CRAN (R 4.4.0)
## vipor 0.4.7 2023-12-18 [2] CRAN (R 4.4.0)
## vroom 1.6.5 2023-12-05 [2] CRAN (R 4.4.0)
## WGCNA 1.72-5 2023-12-07 [2] CRAN (R 4.4.0)
## withr 3.0.0 2024-01-16 [2] CRAN (R 4.4.0)
## xfun 0.41 2023-11-01 [2] CRAN (R 4.4.0)
## XML 3.99-0.16.1 2024-01-22 [2] CRAN (R 4.4.0)
## XVector 0.43.1 2024-01-10 [2] Bioconductor 3.19 (R 4.4.0)
## yaml 2.3.8 2023-12-11 [2] CRAN (R 4.4.0)
## zlibbioc 1.49.0 2023-10-24 [2] Bioconductor
##
## [1] /tmp/Rtmpq5g2WV/Rinstb37571687
## [2] /usr/local/lib/R/site-library
## [3] /usr/local/lib/R/library
##
## ───────────────────────────────────────────────────────────────────────────