The HiCExperiment
class describes Hi-C contact files imported in R, either
through the HiCExperiment
constructor function or using the import
method implemented by HiCExperiment
package.
Usage
HiCExperiment(
file,
resolution = NULL,
focus = NULL,
metadata = list(),
topologicalFeatures = S4Vectors::SimpleList(compartments = GenomicRanges::GRanges(),
borders = GenomicRanges::GRanges(), loops =
InteractionSet::GInteractions(GenomicRanges::GRanges(), GenomicRanges::GRanges()),
viewpoints = GenomicRanges::GRanges()),
pairsFile = NULL,
bed = NULL
)
makeHiCExperimentFromGInteractions(gi)
# S4 method for class 'HiCExperiment'
resolutions(x)
# S4 method for class 'HiCExperiment'
resolution(x)
# S4 method for class 'HiCExperiment'
focus(x)
# S4 method for class 'HiCExperiment,character'
focus(x) <- value
# S4 method for class 'HiCExperiment,numeric'
zoom(x, resolution)
# S4 method for class 'HiCExperiment,character'
refocus(x, focus)
# S4 method for class 'HiCExperiment,missing'
scores(x)
# S4 method for class 'HiCExperiment,character'
scores(x, name)
# S4 method for class 'HiCExperiment,numeric'
scores(x, name)
# S4 method for class 'HiCExperiment,character,numeric'
scores(x, name) <- value
# S4 method for class 'HiCExperiment,missing'
topologicalFeatures(x)
# S4 method for class 'HiCExperiment,character'
topologicalFeatures(x, name)
# S4 method for class 'HiCExperiment,numeric'
topologicalFeatures(x, name)
# S4 method for class 'HiCExperiment,character,GRangesOrGInteractions'
topologicalFeatures(x, name) <- value
# S4 method for class 'HiCExperiment'
pairsFile(x)
# S4 method for class 'HiCExperiment,character'
pairsFile(x) <- value
# S4 method for class 'HiCExperiment,list'
metadata(x) <- value
# S4 method for class 'HiCExperiment,numeric'
subsetByOverlaps(x, ranges)
# S4 method for class 'HiCExperiment,logical'
subsetByOverlaps(x, ranges)
# S4 method for class 'HiCExperiment,GRanges'
subsetByOverlaps(x, ranges, type = c("within", "any"))
# S4 method for class 'HiCExperiment,GInteractions'
subsetByOverlaps(x, ranges)
# S4 method for class 'HiCExperiment,Pairs'
subsetByOverlaps(x, ranges)
# S4 method for class 'HiCExperiment,numeric,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment,GRanges,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment,logical,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment,GInteractions,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment,Pairs,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment,character,ANY,ANY'
x[i]
# S4 method for class 'HiCExperiment'
fileName(object)
# S4 method for class 'HiCExperiment'
interactions(x, fillout.regions = FALSE)
# S4 method for class 'HiCExperiment,GInteractions'
interactions(x) <- value
# S4 method for class 'HiCExperiment'
length(x)
# S4 method for class 'HiCExperiment'
x$name <- value
# S4 method for class 'HiCExperiment'
x$name
# S4 method for class 'HiCExperiment'
seqinfo(x)
# S4 method for class 'HiCExperiment'
bins(x)
# S4 method for class 'HiCExperiment'
anchors(x)
# S4 method for class 'HiCExperiment'
regions(x)
# S4 method for class 'HiCExperiment'
cis(x)
# S4 method for class 'HiCExperiment'
trans(x)
Arguments
- file
CoolFile or plain path to a Hi-C contact file
- resolution
Resolution to use with the Hi-C contact file
- focus
Chromosome coordinates for which interaction counts are extracted from the Hi-C contact file, provided as a character string (e.g. "II:4001-5000"). If not provided, the entire Hi-C contact file will be imported.
- metadata
list of metadata
- topologicalFeatures
topologicalFeatures provided as a named SimpleList
- pairsFile
Path to an associated .pairs file (optional)
- bed
Path to regions file generated by HiC-Pro (optional)
- gi
GInteractions object
- x
A
HiCExperiment
object.- value
Value to add to topologicalFeatures, scores, pairsFile or metadata slots.
- name
Name of the element to access in topologicalFeatures or scores SimpleLists.
- type
any of
within
orany
, to subset interactions by overlap with a provided GRanges.- i, ranges
a GRanges, coordinates in character, or boolean vector to subset a HiCExperiment
- object
A
HiCExperiment
object.- fillout.regions
Whehter to add missing regions to GInteractions' regions?
Slots
fileName
Path of Hi-C contact file
focus
Chr. coordinates for which interaction counts are extracted from the Hi-C contact file.
resolutions
Resolutions available in the Hi-C contact file.
resolution
Current resolution
interactions
Genomic Interactions extracted from the Hi-C contact file
scores
Available interaction scores.
topologicalFeatures
Topological features associated with the dataset (e.g. loops (\<GInteractions\>), borders (\<GRanges\>), viewpoints (\<GRanges\>), etc...)
pairsFile
Path to the .pairs file associated with the Hi-C contact file
metadata
metadata associated with the Hi-C contact file.
Examples
#####################################################################
## Create a HiCExperiment object from a disk-stored contact matrix ##
#####################################################################
mcool_file <- HiContactsData::HiContactsData("yeast_wt", "mcool")
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
pairs_file <- HiContactsData::HiContactsData("yeast_wt", "pairs.gz")
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
contacts <- HiCExperiment(
file = mcool_file,
resolution = 8000L,
pairsFile = pairs_file
)
contacts
#> `HiCExperiment` object with 8,757,906 contacts over 1,517 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "whole genome"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 801962
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
#####################################################################
## ----- Manually create a HiCExperiment from GInteractions ------ ##
#####################################################################
gis <- interactions(contacts)[1:1000]
contacts2 <- makeHiCExperimentFromGInteractions(gis)
contacts2
#> `HiCExperiment` object with 6,670 contacts over 1,517 regions
#> -------
#> fileName: N/A
#> focus: N/A
#> resolutions(1): 8000
#> active resolution: 8000
#> interactions: 1000
#> scores(2): count balanced
#> topologicalFeatures: ()
#> pairsFile: N/A
#> metadata(0):
#####################################################################
## -------- Slots present in an HiCExperiment object ------------- ##
#####################################################################
fileName(contacts)
#> [1] "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
focus(contacts)
#> NULL
resolutions(contacts)
#> [1] 1000 2000 4000 8000 16000
resolution(contacts)
#> [1] 8000
interactions(contacts)
#> GInteractions object with 801962 interactions and 4 metadata columns:
#> seqnames1 ranges1 seqnames2 ranges2 | bin_id1
#> <Rle> <IRanges> <Rle> <IRanges> | <numeric>
#> [1] I 1-8000 --- I 1-8000 | 0
#> [2] I 1-8000 --- I 8001-16000 | 0
#> [3] I 1-8000 --- I 16001-24000 | 0
#> [4] I 1-8000 --- I 24001-32000 | 0
#> [5] I 1-8000 --- I 32001-40000 | 0
#> ... ... ... ... ... ... . ...
#> [801958] XVI 920001-928000 --- XVI 928001-936000 | 1513
#> [801959] XVI 920001-928000 --- XVI 936001-944000 | 1513
#> [801960] XVI 928001-936000 --- XVI 928001-936000 | 1514
#> [801961] XVI 928001-936000 --- XVI 936001-944000 | 1514
#> [801962] XVI 936001-944000 --- XVI 936001-944000 | 1515
#> bin_id2 count balanced
#> <numeric> <numeric> <numeric>
#> [1] 0 705 0.582493
#> [2] 1 1260 0.935951
#> [3] 2 557 0.294491
#> [4] 3 274 0.174475
#> [5] 4 291 0.138932
#> ... ... ... ...
#> [801958] 1514 893 0.515549
#> [801959] 1515 524 0.275326
#> [801960] 1514 1317 0.728102
#> [801961] 1515 1582 0.795995
#> [801962] 1515 1409 0.645227
#> -------
#> regions: 1517 ranges and 4 metadata columns
#> seqinfo: 16 sequences from an unspecified genome
scores(contacts)
#> List of length 2
#> names(2): count balanced
topologicalFeatures(contacts)
#> List of length 4
#> names(4): compartments borders loops viewpoints
pairsFile(contacts)
#> EH7703
#> "/github/home/.cache/R/ExperimentHub/190524de862e_7753"
#####################################################################
## ---------------------- Slot getters --------------------------- ##
#####################################################################
scores(contacts, 1) |> head()
#> [1] 705 1260 557 274 291 214
scores(contacts, 'balanced') |> head()
#> [1] 0.58249310 0.93595103 0.29449150 0.17447479 0.13893217 0.09970617
topologicalFeatures(contacts, 1)
#> GRanges object with 0 ranges and 0 metadata columns:
#> seqnames ranges strand
#> <Rle> <IRanges> <Rle>
#> -------
#> seqinfo: no sequences
#####################################################################
## ---------------------- Slot setters --------------------------- ##
#####################################################################
scores(contacts, 'random') <- runif(length(contacts))
topologicalFeatures(contacts, 'loops') <- InteractionSet::GInteractions(
GenomicRanges::GRanges('II:15324'),
GenomicRanges::GRanges('II:24310')
)
pairsFile(contacts) <- HiContactsData('yeast_wt', 'pairs.gz')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
#####################################################################
## ------------------ Subsetting functions ----------------------- ##
#####################################################################
contacts[1:100]
#> `HiCExperiment` object with 4,140 contacts over 100 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "whole genome"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 100
#> scores(3): count balanced random
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
contacts['II']
#> `HiCExperiment` object with 471,364 contacts over 102 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "II"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 4693
#> scores(3): count balanced random
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
contacts[c('II', 'III')]
#> `HiCExperiment` object with 632,446 contacts over 142 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "II, III"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 8502
#> scores(3): count balanced random
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
contacts['II|III']
#> `HiCExperiment` object with 9,092 contacts over 142 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "II:1-813184|III:1-316620"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 3000
#> scores(3): count balanced random
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
contacts['II:10001-30000|III:50001-90000']
#> `HiCExperiment` object with 11 contacts over 5 regions
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/190530f4def5_7752"
#> focus: "II:10001-30000|III:50001-90000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000
#> interactions: 4
#> scores(3): count balanced random
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0)
#> pairsFile: /github/home/.cache/R/ExperimentHub/190524de862e_7753
#> metadata(0):
#####################################################################
## --------------------- Utils functions ------------------------- ##
#####################################################################
## Adapted from other packages
seqinfo(contacts)
#> Seqinfo object with 16 sequences from an unspecified genome:
#> seqnames seqlengths isCircular genome
#> I 230218 <NA> <NA>
#> II 813184 <NA> <NA>
#> III 316620 <NA> <NA>
#> IV 1531933 <NA> <NA>
#> V 576874 <NA> <NA>
#> ... ... ... ...
#> XII 1078177 <NA> <NA>
#> XIII 924431 <NA> <NA>
#> XIV 784333 <NA> <NA>
#> XV 1091291 <NA> <NA>
#> XVI 948066 <NA> <NA>
bins(contacts)
#> GRanges object with 1517 ranges and 2 metadata columns:
#> seqnames ranges strand | bin_id weight
#> <Rle> <IRanges> <Rle> | <numeric> <numeric>
#> I_1_8000 I 1-8000 * | 0 0.0287442
#> I_8001_16000 I 8001-16000 * | 1 0.0258423
#> I_16001_24000 I 16001-24000 * | 2 0.0183936
#> I_24001_32000 I 24001-32000 * | 3 0.0221529
#> I_32001_40000 I 32001-40000 * | 4 0.0166096
#> ... ... ... ... . ... ...
#> XVI_912001_920000 XVI 912001-920000 * | 1512 0.0122756
#> XVI_920001_928000 XVI 920001-928000 * | 1513 0.0245536
#> XVI_928001_936000 XVI 928001-936000 * | 1514 0.0235127
#> XVI_936001_944000 XVI 936001-944000 * | 1515 0.0213994
#> XVI_944001_948066 XVI 944001-948066 * | 1516 NaN
#> -------
#> seqinfo: 16 sequences from an unspecified genome
anchors(contacts)
#> $first
#> GRanges object with 801962 ranges and 4 metadata columns:
#> seqnames ranges strand | bin_id weight chr center
#> <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>
#> [1] I 1-8000 * | 0 0.0287442 I 4000
#> [2] I 1-8000 * | 0 0.0287442 I 4000
#> [3] I 1-8000 * | 0 0.0287442 I 4000
#> [4] I 1-8000 * | 0 0.0287442 I 4000
#> [5] I 1-8000 * | 0 0.0287442 I 4000
#> ... ... ... ... . ... ... ... ...
#> [801958] XVI 920001-928000 * | 1513 0.0245536 XVI 924000
#> [801959] XVI 920001-928000 * | 1513 0.0245536 XVI 924000
#> [801960] XVI 928001-936000 * | 1514 0.0235127 XVI 932000
#> [801961] XVI 928001-936000 * | 1514 0.0235127 XVI 932000
#> [801962] XVI 936001-944000 * | 1515 0.0213994 XVI 940000
#> -------
#> seqinfo: 16 sequences from an unspecified genome
#>
#> $second
#> GRanges object with 801962 ranges and 4 metadata columns:
#> seqnames ranges strand | bin_id weight chr center
#> <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle> <integer>
#> [1] I 1-8000 * | 0 0.0287442 I 4000
#> [2] I 8001-16000 * | 1 0.0258423 I 12000
#> [3] I 16001-24000 * | 2 0.0183936 I 20000
#> [4] I 24001-32000 * | 3 0.0221529 I 28000
#> [5] I 32001-40000 * | 4 0.0166096 I 36000
#> ... ... ... ... . ... ... ... ...
#> [801958] XVI 928001-936000 * | 1514 0.0235127 XVI 932000
#> [801959] XVI 936001-944000 * | 1515 0.0213994 XVI 940000
#> [801960] XVI 928001-936000 * | 1514 0.0235127 XVI 932000
#> [801961] XVI 936001-944000 * | 1515 0.0213994 XVI 940000
#> [801962] XVI 936001-944000 * | 1515 0.0213994 XVI 940000
#> -------
#> seqinfo: 16 sequences from an unspecified genome
#>
regions(contacts)
#> GRanges object with 1517 ranges and 4 metadata columns:
#> seqnames ranges strand | bin_id weight chr
#> <Rle> <IRanges> <Rle> | <numeric> <numeric> <Rle>
#> I_1_8000 I 1-8000 * | 0 0.0287442 I
#> I_8001_16000 I 8001-16000 * | 1 0.0258423 I
#> I_16001_24000 I 16001-24000 * | 2 0.0183936 I
#> I_24001_32000 I 24001-32000 * | 3 0.0221529 I
#> I_32001_40000 I 32001-40000 * | 4 0.0166096 I
#> ... ... ... ... . ... ... ...
#> XVI_912001_920000 XVI 912001-920000 * | 1512 0.0122756 XVI
#> XVI_920001_928000 XVI 920001-928000 * | 1513 0.0245536 XVI
#> XVI_928001_936000 XVI 928001-936000 * | 1514 0.0235127 XVI
#> XVI_936001_944000 XVI 936001-944000 * | 1515 0.0213994 XVI
#> XVI_944001_948066 XVI 944001-948066 * | 1516 NaN XVI
#> center
#> <integer>
#> I_1_8000 4000
#> I_8001_16000 12000
#> I_16001_24000 20000
#> I_24001_32000 28000
#> I_32001_40000 36000
#> ... ...
#> XVI_912001_920000 916000
#> XVI_920001_928000 924000
#> XVI_928001_936000 932000
#> XVI_936001_944000 940000
#> XVI_944001_948066 946033
#> -------
#> seqinfo: 16 sequences from an unspecified genome
#####################################################################
## ------------- Coercing HiCExperiment objects ------------------ ##
#####################################################################
as(contacts, 'GInteractions')
#> GInteractions object with 801962 interactions and 5 metadata columns:
#> seqnames1 ranges1 seqnames2 ranges2 | bin_id1
#> <Rle> <IRanges> <Rle> <IRanges> | <numeric>
#> [1] I 1-8000 --- I 1-8000 | 0
#> [2] I 1-8000 --- I 8001-16000 | 0
#> [3] I 1-8000 --- I 16001-24000 | 0
#> [4] I 1-8000 --- I 24001-32000 | 0
#> [5] I 1-8000 --- I 32001-40000 | 0
#> ... ... ... ... ... ... . ...
#> [801958] XVI 920001-928000 --- XVI 928001-936000 | 1513
#> [801959] XVI 920001-928000 --- XVI 936001-944000 | 1513
#> [801960] XVI 928001-936000 --- XVI 928001-936000 | 1514
#> [801961] XVI 928001-936000 --- XVI 936001-944000 | 1514
#> [801962] XVI 936001-944000 --- XVI 936001-944000 | 1515
#> bin_id2 count balanced random
#> <numeric> <numeric> <numeric> <numeric>
#> [1] 0 705 0.582493 0.08075014
#> [2] 1 1260 0.935951 0.83433304
#> [3] 2 557 0.294491 0.60076089
#> [4] 3 274 0.174475 0.15720844
#> [5] 4 291 0.138932 0.00739944
#> ... ... ... ... ...
#> [801958] 1514 893 0.515549 0.0686281
#> [801959] 1515 524 0.275326 0.1704898
#> [801960] 1514 1317 0.728102 0.2895182
#> [801961] 1515 1582 0.795995 0.1125209
#> [801962] 1515 1409 0.645227 0.7087667
#> -------
#> regions: 1517 ranges and 4 metadata columns
#> seqinfo: 16 sequences from an unspecified genome
as(contacts, 'ContactMatrix')
#> class: ContactMatrix
#> dim: 1517 1517
#> type: dgCMatrix
#> rownames: NULL
#> colnames: NULL
#> metadata(0):
#> regions: 1517
as(contacts, 'matrix')[seq_len(10), seq_len(10)]
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.58249310 0.93595103 0.29449150 0.17447479 0.13893217 0.09970617
#> [2,] 0.93595103 0.58167668 0.47153090 0.22269611 0.16139073 0.12650163
#> [3,] 0.29449150 0.47153090 0.17119221 0.22492467 0.14572837 0.09093352
#> [4,] 0.17447479 0.22269611 0.22492467 0.12661416 0.31091881 0.19533840
#> [5,] 0.13893217 0.16139073 0.14572837 0.31091881 0.40002396 0.40626151
#> [6,] 0.09970617 0.12650163 0.09093352 0.19533840 0.40626151 0.28033628
#> [7,] 0.07443284 0.09628981 0.08017355 0.14042680 0.17496056 0.28222645
#> [8,] 0.05510079 0.07961469 0.05603711 0.07924403 0.12479940 0.17533293
#> [9,] 0.04456457 0.05371419 0.04230561 0.06416197 0.08546010 0.10963404
#> [10,] 0.01947702 0.02495274 0.01869518 0.02514303 0.03882842 0.05464144
#> [,7] [,8] [,9] [,10]
#> [1,] 0.07443284 0.05510079 0.04456457 0.01947702
#> [2,] 0.09628981 0.07961469 0.05371419 0.02495274
#> [3,] 0.08017355 0.05603711 0.04230561 0.01869518
#> [4,] 0.14042680 0.07924403 0.06416197 0.02514303
#> [5,] 0.17496056 0.12479940 0.08546010 0.03882842
#> [6,] 0.28222645 0.17533293 0.10963404 0.05464144
#> [7,] 0.25591092 0.37561957 0.17347572 0.09070876
#> [8,] 0.37561957 0.47046086 0.37791267 0.14931650
#> [9,] 0.17347572 0.37791267 0.35093060 0.28081603
#> [10,] 0.09070876 0.14931650 0.28081603 0.33402283
as(contacts, 'data.frame')[seq_len(10), seq_len(10)]
#> seqnames1 start1 end1 width1 strand1 bin_id1 weight1 center1 seqnames2
#> 1 I 1 8000 8000 * 0 0.02874424 4000 I
#> 2 I 1 8000 8000 * 0 0.02874424 4000 I
#> 3 I 1 8000 8000 * 0 0.02874424 4000 I
#> 4 I 1 8000 8000 * 0 0.02874424 4000 I
#> 5 I 1 8000 8000 * 0 0.02874424 4000 I
#> 6 I 1 8000 8000 * 0 0.02874424 4000 I
#> 7 I 1 8000 8000 * 0 0.02874424 4000 I
#> 8 I 1 8000 8000 * 0 0.02874424 4000 I
#> 9 I 1 8000 8000 * 0 0.02874424 4000 I
#> 10 I 1 8000 8000 * 0 0.02874424 4000 I
#> start2
#> 1 1
#> 2 8001
#> 3 16001
#> 4 24001
#> 5 32001
#> 6 40001
#> 7 48001
#> 8 56001
#> 9 64001
#> 10 72001