vignettes/HiContacts.Rmd
HiContacts.Rmd
citation('HiContacts')
#> To cite package 'HiContacts' in publications use:
#>
#> Serizay J, Matthey-Doret C, Bignaud A, Baudry L, Koszul R (2024).
#> "Orchestrating chromosome conformation capture analysis with
#> Bioconductor." _Nature Communications_, *15*, 1-9.
#> doi:10.1038/s41467-024-44761-x
#> <https://doi.org/10.1038/s41467-024-44761-x>.
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Article{,
#> author = {Jacques Serizay and Cyril Matthey-Doret and Amaury Bignaud and Lyam Baudry and Romain Koszul},
#> title = {Orchestrating chromosome conformation capture analysis with Bioconductor},
#> journal = {Nature Communications},
#> year = {2024},
#> volume = {15},
#> pages = {1--9},
#> doi = {10.1038/s41467-024-44761-x},
#> }
.(m)/cool
files as
HiCExperiment
objects
The HiCExperiment
package provides classes and methods
to import an .(m)cool file in R. The HiContactsData
package
gives access to a range of toy datasets stored by Bioconductor in the
ExperimentHub
.
library(dplyr)
library(ggplot2)
library(HiCExperiment)
library(HiContacts)
library(HiContactsData)
library(rtracklayer)
#>
#> Attaching package: 'rtracklayer'
#> The following object is masked from 'package:AnnotationHub':
#>
#> hubUrl
library(InteractionSet)
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'matrixStats'
#> The following object is masked from 'package:dplyr':
#>
#> count
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
#> The following object is masked from 'package:ExperimentHub':
#>
#> cache
#> The following object is masked from 'package:AnnotationHub':
#>
#> cache
cool_file <- HiContactsData('yeast_wt', format = 'cool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(cool_file, format = 'cool')
hic
#> `HiCExperiment` object with 8,757,906 contacts over 12,079 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/11307a68a0ec_7751"
#> focus: "whole genome"
#> resolutions(1): 1000
#> active resolution: 1000
#> interactions: 2945692
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
The plotMatrix
function takes a
HiCExperiment
object and plots it as a heatmap.
Use the use.scores
argument to specify which type of
interaction scores to use in the contact maps (e.g. count
,
balanced
, …). By default, plotMatrix()
looks
for balanced scores. If they are not stored in the original
.(m)/cool
file, plotMatrix()
simply takes the
first scores available.
## Square matrix
plotMatrix(hic, use.scores = 'balanced', limits = c(-4, -1))
## Horizontal matrix
plotMatrix(
refocus(hic, 'II'),
use.scores = 'balanced', limits = c(-4, -1),
maxDistance = 200000
)
Loops can be plotted on top of Hi-C matrices by providing a
GInteractions
object to the loops
argument.
Note: Loops in .bedpe
format can be imported in
R using the import()
function, and converted into
GInteractions
with the
InteractionSet::makeGInteractionsFromGRangesPairs()
function.
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
loops <- system.file("extdata", 'S288C-loops.bedpe', package = 'HiCExperiment') |>
import() |>
makeGInteractionsFromGRangesPairs()
p <- import(mcool_file, format = 'mcool', focus = 'IV') |>
plotMatrix(loops = loops, limits = c(-4, -1), dpi = 120)
borders <- system.file("extdata", 'S288C-borders.bed', package = 'HiCExperiment') |>
import()
p <- import(mcool_file, format = 'mcool', focus = 'IV') |>
plotMatrix(loops = loops, borders = borders, limits = c(-4, -1), dpi = 120)
aggr_centros <- HiContacts::aggregate(
hic, targets = loops, BPPARAM = BiocParallel::SerialParam()
)
#> Going through preflight checklist...
#> Parsing the entire contact matrice as a sparse matrix...
#> Modeling distance decay...
#> Filtering for contacts within provided targets...
plotMatrix(
aggr_centros, use.scores = 'detrended', limits = c(-1, 1), scale = 'linear',
cmap = bgrColors()
)
mcool_file <- HiContactsData('mESCs', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', focus = 'chr2', resolution = 160000)
hic <- autocorrelate(hic)
#>
scores(hic)
#> List of length 5
#> names(5): count balanced expected detrended autocorrelated
summary(scores(hic, 'autocorrelated'))
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -0.499 -0.097 0.040 0.043 0.172 1.000 7739
plotMatrix(hic, use.scores = 'autocorrelated', limits = c(-1, 1), scale = 'linear')
hic <- import(mcool_file, format = 'mcool', focus = 'chr18:20000000-35000000', resolution = 40000)
detrended_hic <- detrend(hic)
patchwork::wrap_plots(
plotMatrix(detrended_hic, use.scores = 'expected', scale = 'log10', limits = c(-3, -1), dpi = 120),
plotMatrix(detrended_hic, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), dpi = 120)
)
mcool_file_1 <- HiContactsData('yeast_eco1', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
mcool_file_2 <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic_1 <- import(mcool_file_1, format = 'mcool', focus = 'II:1-300000', resolution = 2000)
hic_2 <- import(mcool_file_2, format = 'mcool', focus = 'II:1-300000', resolution = 2000)
merged_hic <- merge(hic_1, hic_2)
hic_1
#> `HiCExperiment` object with 301,285 contacts over 150 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/113048cd88ef_7754"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 9607
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
hic_2
#> `HiCExperiment` object with 146,812 contacts over 150 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/11307d784c99_7752"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 6933
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
merged_hic
#> `HiCExperiment` object with 229,926 contacts over 150 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/113048cd88ef_7754"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 9748
#> scores(2): count balanced
#> topologicalFeatures: ()
#> pairsFile: N/A
#> metadata(2): hce_list operation
hic_1 <- import(mcool_file_1, format = 'mcool', focus = 'II', resolution = 2000)
hic_2 <- import(mcool_file_2, format = 'mcool', focus = 'II', resolution = 2000)
div_hic <- divide(hic_1, by = hic_2)
div_hic
#> `HiCExperiment` object with 996,154 contacts over 407 regions
#> -------
#> fileName: N/A
#> focus: "II"
#> resolutions(1): 2000
#> active resolution: 2000
#> interactions: 60894
#> scores(6): count.x balanced.x count.by balanced.by balanced.fc balanced.l2fc
#> topologicalFeatures: ()
#> pairsFile: N/A
#> metadata(2): hce_list operation
p <- patchwork::wrap_plots(
plotMatrix(hic_1, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_2, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(div_hic, use.scores = 'balanced.fc', scale = 'log2', limits = c(-2, 2), cmap = bwrColors())
)
hic_1_despeckled <- despeckle(hic_1)
hic_1_despeckled5 <- despeckle(hic_1, focal.size = 5)
p <- patchwork::wrap_plots(
plotMatrix(hic_1, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_1_despeckled, use.scores = 'balanced.despeckled', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_1_despeckled5, use.scores = 'balanced.despeckled', scale = 'log10', limits = c(-4, -1))
)
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 16000)
# - Get compartments
hic <- getCompartments(hic, chromosomes = c('XV', 'XVI'))
#> Going through preflight checklist...
#> Parsing intra-chromosomal contacts for each chromosome...
#> Computing eigenvectors for each chromosome...
hic
#> `HiCExperiment` object with 8,757,906 contacts over 763 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/11307d784c99_7752"
#> focus: "whole genome"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 16000
#> interactions: 267709
#> scores(2): count balanced
#> topologicalFeatures: compartments(18) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(1): eigens
# - Export compartments as bigwig and bed files
export(IRanges::coverage(metadata(hic)$eigens, weight = 'eigen'), 'compartments.bw')
export(
topologicalFeatures(hic, 'compartments')[topologicalFeatures(hic, 'compartments')$compartment == 'A'],
'A-compartments.bed'
)
export(
topologicalFeatures(hic, 'compartments')[topologicalFeatures(hic, 'compartments')$compartment == 'B'],
'B-compartments.bed'
)
# - Generate saddle plot
plotSaddle(hic)
# - Compute insulation score
hic <- refocus(hic, 'II:1-300000') |>
zoom(resolution = 1000) |>
getDiamondInsulation(window_size = 8000) |>
getBorders()
#> Going through preflight checklist...
#> Scan each window and compute diamond insulation score...
#> Annotating diamond score prominence for each window...
hic
#> `HiCExperiment` object with 146,812 contacts over 300 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/11307d784c99_7752"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 1000
#> interactions: 18286
#> scores(2): count balanced
#> topologicalFeatures: compartments(18) borders(17) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(2): eigens insulation
# - Export insulation as bigwig track and borders as bed file
export(IRanges::coverage(metadata(hic)$insulation, weight = 'insulation'), 'insulation.bw')
export(topologicalFeatures(hic, 'borders'), 'borders.bed')
mcool_file <- HiContactsData('mESCs', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 1000)
cisTransRatio(hic)
#> # A tibble: 16 × 6
#> # Groups: chr [16]
#> chr cis trans n_total cis_pct trans_pct
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 I 186326 96738 283064 0.658 0.342
#> 2 II 942728 273966 1216694 0.775 0.225
#> 3 III 303980 127087 431067 0.705 0.295
#> 4 IV 1858062 418218 2276280 0.816 0.184
#> 5 V 607090 220873 827963 0.733 0.267
#> 6 VI 280282 127771 408053 0.687 0.313
#> 7 VII 1228532 335909 1564441 0.785 0.215
#> 8 VIII 574086 205122 779208 0.737 0.263
#> 9 IX 474182 179280 653462 0.726 0.274
#> 10 X 834656 259240 1093896 0.763 0.237
#> 11 XI 775240 245899 1021139 0.759 0.241
#> 12 XII 1182742 278065 1460807 0.810 0.190
#> 13 XIII 1084810 296351 1381161 0.785 0.215
#> 14 XIV 852516 256639 1109155 0.769 0.231
#> 15 XV 1274070 351132 1625202 0.784 0.216
#> 16 XVI 1070700 313520 1384220 0.774 0.226
# Without a pairs file
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 1000)
ps <- distanceLaw(hic)
#> pairsFile not specified. The P(s) curve will be an approximation.
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p))
#> Warning: Removed 18 rows containing missing values or values outside the scale range
#> (`geom_line()`).
# With a pairs file
pairsFile(hic) <- HiContactsData('yeast_wt', format = 'pairs.gz')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
ps <- distanceLaw(hic)
#> Importing pairs file /github/home/.cache/R/ExperimentHub/11301f395da_7753 in memory. This may take a while...
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p))
#> Warning: Removed 67 rows containing missing values or values outside the scale range
#> (`geom_line()`).
plotPsSlope(ps, ggplot2::aes(x = binned_distance, y = slope))
#> Warning: Removed 67 rows containing missing values or values outside the scale range
#> (`geom_line()`).
# Comparing P(s) curves
c1 <- import(
HiContactsData('yeast_wt', format = 'mcool'),
format = 'mcool',
resolution = 1000,
pairsFile = HiContactsData('yeast_wt', format = 'pairs.gz')
)
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
c2 <- import(
HiContactsData('yeast_eco1', format = 'mcool'),
format = 'mcool',
resolution = 1000,
pairsFile = HiContactsData('yeast_eco1', format = 'pairs.gz')
)
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
ps_1 <- distanceLaw(c1) |> mutate(sample = 'WT')
#> Importing pairs file /github/home/.cache/R/ExperimentHub/11301f395da_7753 in memory. This may take a while...
ps_2 <- distanceLaw(c2) |> mutate(sample = 'Eco1-AID')
#> Importing pairs file /github/home/.cache/R/ExperimentHub/113077ca2ef5_7755 in memory. This may take a while...
ps <- rbind(ps_1, ps_2)
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p, group = sample, color = sample))
#> Warning: Removed 134 rows containing missing values or values outside the scale range
#> (`geom_line()`).
plotPsSlope(ps, ggplot2::aes(x = binned_distance, y = slope, group = sample, color = sample))
#> Warning: Removed 135 rows containing missing values or values outside the scale range
#> (`geom_line()`).
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] InteractionSet_1.33.0 SummarizedExperiment_1.35.0
#> [3] Biobase_2.65.0 MatrixGenerics_1.17.0
#> [5] matrixStats_1.3.0 rtracklayer_1.65.0
#> [7] HiContacts_1.5.0 HiContactsData_1.7.0
#> [9] ExperimentHub_2.13.0 AnnotationHub_3.13.0
#> [11] BiocFileCache_2.13.0 dbplyr_2.5.0
#> [13] HiCExperiment_1.5.0 GenomicRanges_1.57.0
#> [15] GenomeInfoDb_1.41.1 IRanges_2.39.0
#> [17] S4Vectors_0.43.0 BiocGenerics_0.51.0
#> [19] dplyr_1.1.4 ggplot2_3.5.1
#> [21] BiocStyle_2.33.0
#>
#> loaded via a namespace (and not attached):
#> [1] strawr_0.0.91 rstudioapi_0.16.0 jsonlite_1.8.8
#> [4] magrittr_2.0.3 ggbeeswarm_0.7.2 farver_2.1.2
#> [7] rmarkdown_2.27 fs_1.6.4 BiocIO_1.15.0
#> [10] zlibbioc_1.51.0 ragg_1.3.2 vctrs_0.6.5
#> [13] memoise_2.0.1 Cairo_1.6-2 Rsamtools_2.21.0
#> [16] RCurl_1.98-1.14 terra_1.7-78 base64enc_0.1-3
#> [19] htmltools_0.5.8.1 S4Arrays_1.5.1 dynamicTreeCut_1.63-1
#> [22] curl_5.2.1 Rhdf5lib_1.27.0 Formula_1.2-5
#> [25] SparseArray_1.5.7 rhdf5_2.49.0 sass_0.4.9
#> [28] bslib_0.7.0 htmlwidgets_1.6.4 desc_1.4.3
#> [31] impute_1.79.0 cachem_1.1.0 GenomicAlignments_1.41.0
#> [34] mime_0.12 lifecycle_1.0.4 iterators_1.0.14
#> [37] pkgconfig_2.0.3 Matrix_1.7-0 R6_2.5.1
#> [40] fastmap_1.2.0 GenomeInfoDbData_1.2.12 digest_0.6.35
#> [43] colorspace_2.1-0 patchwork_1.2.0 AnnotationDbi_1.67.0
#> [46] RSpectra_0.16-1 Hmisc_5.1-3 textshaping_0.4.0
#> [49] RSQLite_2.3.7 filelock_1.0.3 labeling_0.4.3
#> [52] fansi_1.0.6 httr_1.4.7 abind_1.4-5
#> [55] compiler_4.4.0 bit64_4.0.5 withr_3.0.0
#> [58] doParallel_1.0.17 backports_1.5.0 htmlTable_2.4.2
#> [61] BiocParallel_1.39.0 DBI_1.2.3 highr_0.11
#> [64] rappdirs_0.3.3 DelayedArray_0.31.1 rjson_0.2.21
#> [67] tools_4.4.0 foreign_0.8-86 vipor_0.4.7
#> [70] beeswarm_0.4.0 nnet_7.3-19 glue_1.7.0
#> [73] restfulr_0.0.15 rhdf5filters_1.17.0 grid_4.4.0
#> [76] checkmate_2.3.1 cluster_2.1.6 generics_0.1.3
#> [79] gtable_0.3.5 tzdb_0.4.0 preprocessCore_1.67.0
#> [82] tidyr_1.3.1 data.table_1.15.4 hms_1.1.3
#> [85] WGCNA_1.72-5 utf8_1.2.4 XVector_0.45.0
#> [88] BiocVersion_3.20.0 foreach_1.5.2 pillar_1.9.0
#> [91] stringr_1.5.1 vroom_1.6.5 splines_4.4.0
#> [94] lattice_0.22-6 survival_3.6-4 bit_4.0.5
#> [97] tidyselect_1.2.1 GO.db_3.19.1 Biostrings_2.73.0
#> [100] knitr_1.47 gridExtra_2.3 bookdown_0.39
#> [103] xfun_0.44 stringi_1.8.4 UCSC.utils_1.1.0
#> [106] yaml_2.3.8 evaluate_0.23 codetools_0.2-20
#> [109] tibble_3.2.1 BiocManager_1.30.23 cli_3.6.2
#> [112] rpart_4.1.23 systemfonts_1.1.0 munsell_0.5.1
#> [115] jquerylib_0.1.4 Rcpp_1.0.12 png_0.1-8
#> [118] fastcluster_1.2.6 XML_3.99-0.16.1 ggrastr_1.0.2
#> [121] parallel_4.4.0 pkgdown_2.0.9 readr_2.1.5
#> [124] blob_1.2.4 bitops_1.0-7 scales_1.3.0
#> [127] purrr_1.0.2 crayon_1.5.2 rlang_1.1.3
#> [130] KEGGREST_1.45.0