library(HiContacts)
library(purrr)
library(ggplot2)
hics <- list(
'G1' = import('/home/rsg/repos/OHCA-data/S288c_G1.mcool', resolution = 4000),
'G2M' = import('/home/rsg/repos/OHCA-data/S288c_G2M.mcool', resolution = 4000)
)
imap(hics, ~ plotMatrix(
.x, use.scores = 'balanced', limits = c(-4, -1), caption = FALSE
) + ggtitle(.y))
Workflow 3: Inter-centromere interactions in yeast
This chapter illustrates how to plot the aggregate signal over pairs of genomic ranges, in this case pairs of yeast centromeres.
We leverage two yeast datasets in this notebook.
- One from a WT yeast strain in G1 phase
- One from a WT yeast strain in G2/M phase
Importing Hi-C data and plotting contact matrices
We can visually appreciate that inter-chromosomal interactions, notably between centromeres, are less prominent in G2/M.
Checking P(s) and cis/trans interactions ratio
library(dplyr)
pairs <- list(
'G1' = PairsFile('/home/rsg/repos/OHCA-data/S288c_G1.pairs'),
'G2M' = PairsFile('/home/rsg/repos/OHCA-data/S288c_G2M.pairs')
)
ps <- imap_dfr(pairs, ~ distanceLaw(.x, by_chr = TRUE) |>
mutate(sample = .y)
)
plotPs(ps, aes(x = binned_distance, y = norm_p, group = interaction(sample, chr), color = sample)) +
scale_color_manual(values = c('black', 'red'))
plotPsSlope(ps, ggplot2::aes(x = binned_distance, y = slope, group = interaction(sample, chr), color = sample)) +
scale_color_manual(values = c('black', 'red'))
This confirms that interactions in cells synchronized in G2/M are enriched for 10-30kb-long interactions.
ratios <- imap_dfr(hics, ~ cisTransRatio(.x) |> mutate(sample = .y))
ggplot(ratios, aes(x = chr, y = trans_pct, fill = sample)) +
geom_col() +
labs(x = 'Chromosomes', y = "% of trans interactions") +
scale_y_continuous(labels = scales::percent) +
facet_grid(~sample)
We can also highlight that trans (inter-chromosomal) interactions are proportionally decreasing in G2/M-synchronized cells.
Centromere virtual 4C profiles
data(centros_yeast)
v4c_centro <- imap_dfr(hics, ~ virtual4C(.x, resize(centros_yeast[2], 8000)) |>
as_tibble() |>
mutate(sample = .y) |>
filter(seqnames == 'IV')
)
ggplot(v4c_centro, aes(x = start, y = score, colour = sample)) +
geom_line() +
theme_bw() +
labs(
x = "chrIV position",
y = "Contacts with chrII centromere",
title = "Interaction profile of chrII centromere"
)
Aggregated 2D signal over all pairs of centromeres
We can start by computing all possible pairs of centromeres.
centros_pairs <- lapply(1:length(centros_yeast), function(i) {
lapply(1:length(centros_yeast), function(j) {
S4Vectors::Pairs(centros_yeast[i], centros_yeast[j])
})
}) |>
do.call(c, args = _) |>
do.call(c, args = _) |>
InteractionSet::makeGInteractionsFromGRangesPairs()
centros_pairs <- centros_pairs[anchors(centros_pairs, 'first') != anchors(centros_pairs, 'second')]
centros_pairs
## GInteractions object with 240 interactions and 0 metadata columns:
## seqnames1 ranges1 seqnames2 ranges2
## <Rle> <IRanges> <Rle> <IRanges>
## [1] I 151583-151641 --- II 238361-238419
## [2] I 151583-151641 --- III 114322-114380
## [3] I 151583-151641 --- IV 449879-449937
## [4] I 151583-151641 --- V 152522-152580
## [5] I 151583-151641 --- VI 147981-148039
## ... ... ... ... ... ...
## [236] XVI 556255-556313 --- XI 440229-440287
## [237] XVI 556255-556313 --- XII 151366-151424
## [238] XVI 556255-556313 --- XIII 268222-268280
## [239] XVI 556255-556313 --- XIV 628588-628646
## [240] XVI 556255-556313 --- XV 326897-326955
## -------
## regions: 16 ranges and 0 metadata columns
## seqinfo: 17 sequences (1 circular) from R64-1-1 genome
Then we can aggregate the Hi-C signal over each pair of centromeres.
aggr_maps <- purrr::imap(hics, ~ {
aggr <- aggregate(.x, centros_pairs, maxDistance = 1e999)
plotMatrix(
aggr, use.scores = 'balanced', limits = c(-5, -1),
cmap = HiContacts::rainbowColors(),
caption = FALSE
) + ggtitle(.y)
})
## Going through preflight checklist...
## Parsing the entire contact matrice as a sparse matrix...
## Modeling distance decay...
## Filtering for contacts within provided targets...
## Going through preflight checklist...
## Parsing the entire contact matrice as a sparse matrix...
## Modeling distance decay...
## Filtering for contacts within provided targets...
cowplot::plot_grid(plotlist = aggr_maps, nrow = 1)
Aggregated 1D interaction profile of centromeres
One can generalize the previous virtual 4C plot, by extracting the interaction profile between all possible pairs of centromeres in each dataset.
df <- map_dfr(1:{length(centros_yeast)-1}, function(i) {
centro1 <- resize(centros_yeast[i], fix = 'center', 8000)
map_dfr({i+1}:length(centros_yeast), function(j) {
centro2 <- resize(centros_yeast[j], fix = 'center', 80000)
gi <- GInteractions(centro1, centro2)
imap_dfr(hics, ~ .x[gi] |>
interactions() |>
as_tibble() |>
mutate(
sample = .y,
center = center2 - start(resize(centro2, fix = 'center', 1))
) |>
select(sample, seqnames1, seqnames2, center, balanced)
)
})
})
p <- ggplot(df, aes(x = center/1e3, y = balanced)) +
geom_line(aes(group = interaction(seqnames1, seqnames2)), alpha = 0.03, col = "black") +
geom_smooth(col = "red", fill = "red") +
theme_bw() +
theme(legend.position = 'none') +
labs(
x = "Distance from centromere (kb)", y = "Normalized interaction frequency",
title = "Centromere pairwise interaction profiles"
) +
facet_grid(~sample)