Title: | Deconvolution of Spatial Transcriptomics Data Based on Neural Networks |
---|---|
Description: | Deconvolution of spatial transcriptomics data based on neural networks and single-cell RNA-seq data. SpatialDDLS implements a workflow to create neural network models able to make accurate estimates of cell composition of spots from spatial transcriptomics data using deep learning and the meaningful information provided by single-cell RNA-seq data. See Torroja and Sanchez-Cabo (2019) <doi:10.3389/fgene.2019.00978> and Mañanes et al. (2024) <doi:10.1093/bioinformatics/btae072> to get an overview of the method and see some examples of its performance. |
Authors: | Diego Mañanes [aut, cre] |
Maintainer: | Diego Mañanes <[email protected]> |
License: | GPL-3 |
Version: | 1.0.3 |
Built: | 2025-02-16 05:04:47 UTC |
Source: | https://github.com/diegommcc/spatialddls |
Generate bar error plots by cell type (CellType
) or by number of
different cell types (nCellTypes
) on test mixed transcriptional
profiles.
barErrorPlot( object, error = "MSE", by = "CellType", dispersion = "se", filter.sc = TRUE, title = NULL, angle = NULL, theme = NULL )
barErrorPlot( object, error = "MSE", by = "CellType", dispersion = "se", filter.sc = TRUE, title = NULL, angle = NULL, theme = NULL )
object |
|
error |
|
by |
Variable used to show errors. Available options are:
|
dispersion |
Standard error ( |
filter.sc |
Boolean indicating whether single-cell profiles are filtered
out and only correlation of results associated with mixed transcriptional
profiles are shown ( |
title |
Title of the plot. |
angle |
Angle of ticks. |
theme |
ggplot2 theme. |
A ggplot object.
calculateEvalMetrics
corrExpPredPlot
distErrorPlot
blandAltmanLehPlot
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 100, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 10, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # bar error plots barErrorPlot( object = SDDLS, error = "MSE", by = "CellType" ) barErrorPlot( object = SDDLS, error = "MAE", by = "nCellTypes" )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 100, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 10, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # bar error plots barErrorPlot( object = SDDLS, error = "MSE", by = "CellType" ) barErrorPlot( object = SDDLS, error = "MAE", by = "nCellTypes" )
Bar plot of deconvoluted cell type proportions.
barPlotCellTypes( data, colors = NULL, set = NULL, prediction = "Regularized", color.line = NA, x.label = "Spots", rm.x.text = FALSE, title = "Results of deconvolution", legend.title = "Cell types", angle = 90, theme = NULL, index.st = NULL )
barPlotCellTypes( data, colors = NULL, set = NULL, prediction = "Regularized", color.line = NA, x.label = "Spots", rm.x.text = FALSE, title = "Results of deconvolution", legend.title = "Cell types", angle = 90, theme = NULL, index.st = NULL )
data |
|
colors |
Vector of colors to be used. |
set |
Type of simplification performed during deconvolution. It can
be |
prediction |
Set of predicted cell proportions to be plotted. It can be
|
color.line |
Color of the border bars. |
x.label |
Label of x-axis. |
rm.x.text |
Logical value indicating whether to remove x-axis ticks (name of samples). |
title |
Title of the plot. |
legend.title |
Title of the legend plot. |
angle |
Angle of text ticks. |
theme |
ggplot2 theme. |
index.st |
Name or index of the element wanted to be shown in the
|
A ggplot object with the provided cell proportions represented as a bar plot.
Generate Bland-Altman agreement plots between predicted and expected cell
type proportions from test data. The Bland-Altman agreement plots can be
shown all mixed or split by either cell type (CellType
) or the number
of cell types present in spots (nCellTypes
). See the facet.by
argument and examples for more information.
blandAltmanLehPlot( object, colors, color.by = "CellType", facet.by = NULL, log.2 = FALSE, filter.sc = TRUE, density = TRUE, color.density = "darkblue", size.point = 0.05, alpha.point = 1, ncol = NULL, nrow = NULL, title = NULL, theme = NULL, ... )
blandAltmanLehPlot( object, colors, color.by = "CellType", facet.by = NULL, log.2 = FALSE, filter.sc = TRUE, density = TRUE, color.density = "darkblue", size.point = 0.05, alpha.point = 1, ncol = NULL, nrow = NULL, title = NULL, theme = NULL, ... )
object |
|
colors |
Vector of colors to be used. |
color.by |
Variable used to color data. Options are |
facet.by |
Variable used to show the data in different panels. If
|
log.2 |
Whether to show the Bland-Altman agreement plot in log2 space
( |
filter.sc |
Boolean indicating whether single-cell profiles are filtered
out and only correlations of results associated with mixed spot profiles
are shown ( |
density |
Boolean indicating whether density lines should be shown
( |
color.density |
Color of density lines if the |
size.point |
Size of the points (0.1 by default). |
alpha.point |
Alpha of the points (0.1 by default). |
ncol |
Number of columns if |
nrow |
Number of rows if |
title |
Title of the plot. |
theme |
ggplot2 theme. |
... |
Additional argument for the |
A ggplot object.
calculateEvalMetrics
corrExpPredPlot
distErrorPlot
barErrorPlot
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # Bland-Altman plot by cell type blandAltmanLehPlot( object = SDDLS, facet.by = "CellType", color.by = "CellType" ) # Bland-Altman plot of all samples mixed blandAltmanLehPlot( object = SDDLS, facet.by = NULL, color.by = "CellType", alpha.point = 0.3, log2 = TRUE )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # Bland-Altman plot by cell type blandAltmanLehPlot( object = SDDLS, facet.by = "CellType", color.by = "CellType" ) # Bland-Altman plot of all samples mixed blandAltmanLehPlot( object = SDDLS, facet.by = NULL, color.by = "CellType", alpha.point = 0.3, log2 = TRUE )
Calculate evaluation metrics on test mixed transcriptional profiles. By
default, absolute error (AbsErr
), proportional absolute error
(ppAbsErr
), squared error (SqrErr
), and proportional squared
error (ppSqrErr
) are calculated for each test mixed profile. In
addition, each of these metrics is aggregated according to three criteria:
cell type (CellType
), probability bins in ranges of 0.1 (pBin
),
and number of different cell types present in the spot (nCellTypes
).
calculateEvalMetrics(object)
calculateEvalMetrics(object)
object |
|
A SpatialDDLS
object with is a
DeconvDLModel
object. The calculated metrics are
stored in the test.deconv.metrics
slot of the
DeconvDLModel
object.
distErrorPlot
corrExpPredPlot
blandAltmanLehPlot
barErrorPlot
cell.names
slot in a
PropCellTypes
objectGet and set cell.names
slot in a
PropCellTypes
object
cell.names(object) cell.names(object) <- value
cell.names(object) cell.names(object) <- value
object |
|
value |
Matrix containing names of the mixed transcriptional profiles to be simulated as rows and cells to be used to simulate them as columns. |
cell.types
slot in a
DeconvDLModel
objectGet and set cell.types
slot in a
DeconvDLModel
object
cell.types(object) cell.types(object) <- value
cell.types(object) cell.types(object) <- value
object |
|
value |
Vector with cell types considered by the deep neural network model. |
Generate correlation plots between predicted and expected cell type
proportions of test data. Correlation plots can be shown all mixed or either
split by cell type (CellType
) or the number of different cell types
present in the spots (nCellTypes
).
corrExpPredPlot( object, colors, facet.by = NULL, color.by = "CellType", corr = "both", filter.sc = TRUE, pos.x.label = 0.01, pos.y.label = 0.95, sep.labels = 0.15, size.point = 0.1, alpha.point = 1, ncol = NULL, nrow = NULL, title = NULL, theme = NULL, ... )
corrExpPredPlot( object, colors, facet.by = NULL, color.by = "CellType", corr = "both", filter.sc = TRUE, pos.x.label = 0.01, pos.y.label = 0.95, sep.labels = 0.15, size.point = 0.1, alpha.point = 1, ncol = NULL, nrow = NULL, title = NULL, theme = NULL, ... )
object |
|
colors |
Vector of colors to be used. |
facet.by |
Show data in different panels. Options are |
color.by |
Variable used to color data. Options are |
corr |
Correlation value shown as an annotation on the plot. Available
metrics are Pearson's correlation coefficient ( |
filter.sc |
Boolean indicating whether single-cell profiles are filtered
out and only mixed transcriptional profile errors are shown ( |
pos.x.label |
X-axis position of correlation annotations (0.95 by default). |
pos.y.label |
Y-axis position of correlation annotations (0.1 by default). |
sep.labels |
Space separating annotations if |
size.point |
Size of points (0.1 by default). |
alpha.point |
Alpha of points (0.1 by default). |
ncol |
Number of columns if |
nrow |
Number of rows if |
title |
Title of the plot. |
theme |
ggplot2 theme. |
... |
Additional arguments for the facet_wrap function
of ggplot2 if |
A ggplot object.
calculateEvalMetrics
distErrorPlot
blandAltmanLehPlot
barErrorPlot
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # correlations by cell type corrExpPredPlot( object = SDDLS, facet.by = "CellType", color.by = "CellType", corr = "both" ) # correlations of all samples mixed corrExpPredPlot( object = SDDLS, facet.by = NULL, color.by = "CellType", corr = "ccc", pos.x.label = 0.2, alpha.point = 0.3 )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # correlations by cell type corrExpPredPlot( object = SDDLS, facet.by = "CellType", color.by = "CellType", corr = "both" ) # correlations of all samples mixed corrExpPredPlot( object = SDDLS, facet.by = NULL, color.by = "CellType", corr = "ccc", pos.x.label = 0.2, alpha.point = 0.3 )
SpatialDDLS
objectCreate a SpatialDDLS
object by providing single-cell
RNA-seq data. Additionally, spatial transcriptomics data contained in
SpatialDDLS
objects can also be provided. It is
recommended to provide both types of data to only use genes shared between
both experiments.
createSpatialDDLSobject( sc.data, sc.cell.ID.column, sc.cell.type.column, sc.gene.ID.column, st.data, st.spot.ID.column, st.gene.ID.column, filter.mt.genes = "^mt-", sc.filt.genes.cluster = TRUE, sc.min.mean.counts = 1, sc.n.genes.per.cluster = 300, top.n.genes = 2000, sc.log.FC = TRUE, sc.min.counts = 1, sc.min.cells = 1, st.min.counts = 1, st.min.spots = 1, st.n.slides = 3, shared.genes = TRUE, sc.name.dataset.h5 = NULL, sc.file.backend = NULL, sc.name.dataset.backend = NULL, sc.compression.level = NULL, sc.chunk.dims = NULL, sc.block.processing = FALSE, verbose = TRUE, project = "SpatialDDLS-Proj" )
createSpatialDDLSobject( sc.data, sc.cell.ID.column, sc.cell.type.column, sc.gene.ID.column, st.data, st.spot.ID.column, st.gene.ID.column, filter.mt.genes = "^mt-", sc.filt.genes.cluster = TRUE, sc.min.mean.counts = 1, sc.n.genes.per.cluster = 300, top.n.genes = 2000, sc.log.FC = TRUE, sc.min.counts = 1, sc.min.cells = 1, st.min.counts = 1, st.min.spots = 1, st.n.slides = 3, shared.genes = TRUE, sc.name.dataset.h5 = NULL, sc.file.backend = NULL, sc.name.dataset.backend = NULL, sc.compression.level = NULL, sc.chunk.dims = NULL, sc.block.processing = FALSE, verbose = TRUE, project = "SpatialDDLS-Proj" )
sc.data |
Single-cell RNA-seq profiles to be used as reference. If data
are provided from files, |
sc.cell.ID.column |
Name or number of the column in cells metadata corresponding to cell names in expression matrix (single-cell RNA-seq data). |
sc.cell.type.column |
Name or column number corresponding to cell types in cells metadata. |
sc.gene.ID.column |
Name or number of the column in genes metadata corresponding to the names used for features/genes (single-cell RNA-seq data). |
st.data |
Spatial transcriptomics datasets to be deconvoluted. It can be
a single |
st.spot.ID.column |
Name or number of the column in spots metadata corresponding to spot names in expression matrix (spatial transcriptomics data). |
st.gene.ID.column |
Name or number of the column in the genes metadata corresponding to the names used for features/genes (spatial transcriptomics data). |
filter.mt.genes |
Regular expression matching mitochondrial genes to
be ruled out ( |
sc.filt.genes.cluster |
Whether to filter single-cell RNA-seq genes
according to a minimum threshold of non-zero average counts per cell type
( |
sc.min.mean.counts |
Minimum non-zero average counts per cluster to filter genes. 1 by default. |
sc.n.genes.per.cluster |
Top n genes with the highest logFC per cluster (300 by default). See Details section for more details. |
top.n.genes |
Maximum number of genes used for downstream steps (2000
by default). In case the number of genes after filtering is greater than
|
sc.log.FC |
Whether to filter genes with a logFC less than 0.5 when
|
sc.min.counts |
Minimum gene counts to filter (1 by default; single-cell RNA-seq data). |
sc.min.cells |
Minimum of cells with more than |
st.min.counts |
Minimum gene counts to filter (1 by default; spatial transcriptomics data). |
st.min.spots |
Minimum of cells with more than |
st.n.slides |
Minimum number of slides
( |
shared.genes |
If set to |
sc.name.dataset.h5 |
Name of the data set if HDF5 file is provided for single-cell RNA-seq data. |
sc.file.backend |
Valid file path where to store the loaded for
single-cell RNA-seq data as HDF5 file. If provided, data are stored in a
HDF5 file as back-end using the DelayedArray and HDF5Array
packages instead of being loaded into RAM. This is suitable for situations
where you have large amounts of data that cannot be stored in memory. Note
that operations on these data will be performed by blocks (i.e subsets of
determined size), which may result in longer execution times. |
sc.name.dataset.backend |
Name of the HDF5 file dataset to be used. Note
that it cannot exist. If |
sc.compression.level |
The compression level used if
|
sc.chunk.dims |
Specifies dimensions that HDF5 chunk will have. If
|
sc.block.processing |
Boolean indicating whether single-cell RNA-seq
data should be treated as blocks (only if data are provided as HDF5 file).
|
verbose |
Show informative messages during the execution ( |
project |
Name of the project for |
Filtering genes
In order to reduce the number of dimensions used for subsequent steps,
createSpatialDDLSobject
implements different strategies aimed at
removing useless genes for deconvolution:
Filtering at the
cell level: genes less expressed than a determined cutoff in N cells are
removed. See sc.min.cells
/st.min.cells
and
sc.min.counts
/st.min.cells
parameters.
Filtering at the
cluster level (only for scRNA-seq data): if
sc.filt.genes.cluster == TRUE
, createSpatialDDLSobject
sets a
cutoff of non-zero average counts per
cluster (sc.min.mean.counts
parameter) and take only the
sc.n.genes.per.cluster
genes with the highest logFC per cluster.
LogFCs are calculated using normalized logCPM of each cluster with respect to
the average in the whole dataset). Finally, if
the number of remaining genes is greater than top.n.genes
, genes are
ranked based on variance and the top.n.genes
most variable genes are
used for downstream analyses.
Single-cell RNA-seq data
Single-cell RNA-seq data can be provided from files (formats allowed: tsv,
tsv.gz, mtx (sparse matrix) and hdf5) or a
SingleCellExperiment
object. Data will be
stored in the single.cell.real
slot, and must consist of three pieces
of information:
Single-cell counts: genes as rows and cells as columns.
Cells metadata: annotations (columns) for each cell (rows).
Genes metadata: annotations (columns) for each gene (rows).
If data
are provided from files, single.cell.real
argument must be a vector of
three elements ordered so that the first file corresponds to the count
matrix, the second to the cells metadata, and the last to the genes metadata.
On the other hand, if data are provided as a
SingleCellExperiment
object, it must
contain single-cell counts in assay
, cells metadata in colData
,
and genes metadata in rowData
. Data must be provided without any
transformation (e.g. log-transformation), raw counts are preferred.
Spatial transcriptomics data
It must be a SpatialExperiment
object (or a
list of them if more than one slide is going to be deconvoluted) containing
the same information as the single-cell RNA-seq data: the count matrix, spots
metadata, and genes metadata. Please, make sure the gene identifiers used the
spatial and single-cell transcriptomics data are consistent.
A SpatialDDLS
object with the single-cell
RNA-seq data provided loaded into the single.cell.real
slot as a
SingleCellExperiment
object. If spatial
transcriptomics data are provided, they will be loaded into the
spatial.experiments
slot.
estimateZinbwaveParams
genMixedCellProp
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) counts <- matrix( rpois(30, lambda = 5), ncol = 6, dimnames = list(paste0("Gene", 1:5), paste0("Spot", 1:6)) ) coordinates <- matrix( c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), ncol = 2 ) ste <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", 1:5)), colData = data.frame(Cell_ID = paste0("Spot", 1:6)), spatialCoords = coordinates ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", st.data = ste, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) counts <- matrix( rpois(30, lambda = 5), ncol = 6, dimnames = list(paste0("Gene", 1:5), paste0("Spot", 1:6)) ) coordinates <- matrix( c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), ncol = 2 ) ste <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", 1:5)), colData = data.frame(Cell_ID = paste0("Spot", 1:6)), spatialCoords = coordinates ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", st.data = ste, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE )
deconv.spots
slot in a
SpatialExperiment
objectGet and set deconv.spots
slot in a
SpatialExperiment
object
deconv.spots(object, index.st = NULL) deconv.spots(object, index.st = NULL) <- value
deconv.spots(object, index.st = NULL) deconv.spots(object, index.st = NULL) <- value
object |
|
index.st |
Name or index of predicted cell proportions (same as for the
|
value |
List of predicted cell type proportions for the experiments
stored in the |
The DeconvDLModel
object stores all the information
related to deep neural network models. It consists of the trained model, the
training history, and the predictions on test data. After running
calculateEvalMetrics
, it is possible to find the performance
evaluation of the model on test data (see ?calculateEvalMetrics
for details).
The steps related to Deep Learning are carried out using the keras and
tensorflow packages, which use the R6 classes system. If you want to
save the DeconvDLModel
object as an RDS file,
SpatialDDLS provides a saveRDS
generic function that transforms
the R6 object containing the trained model into a native valid R object.
Specifically, the model is converted into a list with the architecture of the
network and the weights learned during training, which is the minimum
information needed to use the model as a predictor. If you want to keep the
optimizer state, see ?saveTrainedModelAsH5
. If you want to
store either the DeconvDLModel
or the
SpatialDDLS
objects on disk as RDA files, see
?preparingToSave
.
model
Trained deep neural network. This slot can contain an R6
keras.engine.sequential.Sequential
object or a list with two
elements: the architecture of the model and the resulting weights after
training.
training.history
List with the evolution of the selected metrics during training.
test.metrics
Performance of the model on test data.
test.pred
Predicted cell type proportions on test data.
cell.types
Vector with cell types considered by the model.
features
Vector with features (genes) considered by the model. These features will be used for subsequent predictions.
test.deconv.metrics
Performance of the model on test data by cell type.
This slot is generated by the calculateEvalMetrics
function
(see ?calculateEvalMetrics
for more details).
interpret.gradients
Gradients for interpretation. SpatialDDLS
provides some functions to better understand prediction made by the model
(see ?interGradientsDL
for more details). This slot is a list
of either one or two elements: gradients of either the loss function or the
predicted class with respect to the input variables using pure (only one
cell type) mixed transcriptional profiles. These gradients can be
interpreted as to what extent the model is using these variables to predict
each cell type proportions.
Deconvolute spatial transcriptomics data using the trained model in
the SpatialDDLS
object. The trained model is used
to predict cell proportions of two mirrored transcriptional profiles:
'Intrinsic' profiles: transcriptional profiles of each spot in the ST dataset.
'Extrinsic' profiles: profiles simulated from the surrounding spots of each spot.
After prediction, cell proportions from the intrinsic profiles (intrinsic cell proportions) are regularized based on the similarity between intrinsic and extrinsic profiles in order to maintain spatial consistency. This approach leverages both transcriptional and spatial information. For more details, see Mañanes et al., 2023 and the Details section.
deconvSpatialDDLS( object, index.st, normalize = TRUE, scaling = "standardize", k.spots = 4, pca.space = TRUE, fast.pca = TRUE, pcs.num = 50, pca.var = 0.8, metric = "euclidean", alpha.cutoff = "mean", alpha.quantile = 0.5, simplify.set = NULL, simplify.majority = NULL, use.generator = FALSE, batch.size = 64, verbose = TRUE )
deconvSpatialDDLS( object, index.st, normalize = TRUE, scaling = "standardize", k.spots = 4, pca.space = TRUE, fast.pca = TRUE, pcs.num = 50, pca.var = 0.8, metric = "euclidean", alpha.cutoff = "mean", alpha.quantile = 0.5, simplify.set = NULL, simplify.majority = NULL, use.generator = FALSE, batch.size = 64, verbose = TRUE )
object |
|
index.st |
Name or index of the dataset/slide stored in the
|
normalize |
Normalize data (logCPM) before deconvolution ( |
scaling |
How to scale data before training. Options include
|
k.spots |
Number of nearest spots considered for each spot during regularization and simulation of extrinsic transcriptional profiles. The greater, the smoother the regularization will be (4 by default). |
pca.space |
Whether to use PCA space to calculate distances between
intrinsic and extrinsic transcriptional profiles ( |
fast.pca |
Whether using the irlba implementation. If |
pcs.num |
Number of PCs used to calculate distances if
|
pca.var |
Threshold of explained
variance (between 0.2 and 1) used to choose the number of PCs used if
|
metric |
Metric used to measure distance/similarity between intrinsic
and extrinsic transcriptional profiles. It may be |
alpha.cutoff |
Minimum distance for regularization.
It may be |
alpha.quantile |
Quantile used if |
simplify.set |
List specifying which cell types should be compressed
into a new label with the name of the list item. See examples for details.
If provided, results are stored in a list with |
simplify.majority |
List specifying which cell types should be
compressed into the cell type with the highest proportion in each spot.
Unlike |
use.generator |
Boolean indicating whether to use generators for
prediction ( |
batch.size |
Number of samples per batch. Only when |
verbose |
Show informative messages during the execution. |
The deconvolution process involves two main steps: predicting cell proportions based on transcriptome using the trained neural network model, and regularization of cell proportions based on the spatial location of each spot. In the regularization step, a mirrored version of each spot is simulated based on its N-nearest spots. We refer to these profiles as 'extrinsic' profiles, whereas the transcriptional profiles of each spot are called 'intrinsic' profiles. Extrinsic profiles are used to regularize predictions based on intrinsic profiles. The rationale is that spots surrounded by transcriptionally similar spots should have similar cell compositions, and therefore predicted proportions can be smoothed to preserve their spatial consistency. On the other hand, spots surrounded by dissimilar spots cannot be predicted by their neighbors, and thus they can only be predicted by their own transcriptional profiles likely due to presenting very specific cell compositions.
Regarding the working os SpatialDDLS: first, extrinsic profiles are
simulated based on the N-nearest spots for each spot by summing their
transcriptomes. Distances between extrinsic and intrinsic profiles of each
spot are calculated so that similar/dissimilar spots are identified. These
two sets of transcriptional profiles are used as input for the trained neural
network model, and according to the calculated distances, a weighted mean
between the predicted proportions for each spot is calculated. Spots with
distances between intrinsic and extrinsic profiles greater than
alpha.cutoff
are not regularized, whereas spots with distances less
than alpha.cutoff
contribute to the weighted mean. Weights are
calculated by rescaling distances less than alpha.cutoff
between 0
and 0.5, so that the maximum extent to which a extrinsic profile can
modified the predictions based on intrinsic profiles is 0.5 (a regular
mean). For more details, see Mañanes et al., 2023.
This function requires a SpatialDDLS
object with a
trained deep neural network model (trained.model
slot, and the
spatial transcriptomics datasets to be deconvoluted in the
spatial.experiments
slot. See ?createSpatialDDLSobject
or ?loadSTProfiles
for more details.
SpatialDDLS
object with a deconv.spots
slot. The output is a list containing 'Regularized', 'Intrinsic' and
'Extrinsic' deconvoluted cell proportions, 'Distances' between intrinsic
and extrinsic transcriptional profiles, and 'Weight.factors' with the
final weights used to regularize intrinsic cell proportions. If
simplify.set
and/or simplify.majority
are provided,
the deconv.spots
slot will contain a list with raw and simplified
results.
Mañanes, D., Rivero-García, I., Jimenez-Carretero, D., Torres, M., Sancho, D., Torroja, C., Sánchez-Cabo, F. (2023). SpatialDDLS: An R package to deconvolute spatial transcriptomics data using neural networks. biorxiv. doi: doi:10.1101/2023.08.31.555677.
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of SDDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # simulating spatial data ngenes <- sample(3:40, size = 1) ncells <- sample(10:40, size = 1) counts <- matrix( rpois(ngenes * ncells, lambda = 5), ncol = ncells, dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells))) ) coordinates <- matrix( rep(c(1, 2), ncells), ncol = 2 ) st <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))), colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))), spatialCoords = coordinates ) SDDLS <- loadSTProfiles( object = SDDLS, st.data = st, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" ) # simplify arguments simplify <- list(CellGroup1 = c("CellType1", "CellType2", "CellType4"), CellGroup2 = c("CellType3", "CellType5")) SDDLS <- deconvSpatialDDLS( object = SDDLS, index.st = 1, simplify.set = simplify, simplify.majority = simplify )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample(x = paste0("CellType", seq(6)), size = 20, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of SDDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # simulating spatial data ngenes <- sample(3:40, size = 1) ncells <- sample(10:40, size = 1) counts <- matrix( rpois(ngenes * ncells, lambda = 5), ncol = ncells, dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells))) ) coordinates <- matrix( rep(c(1, 2), ncells), ncol = 2 ) st <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))), colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))), spatialCoords = coordinates ) SDDLS <- loadSTProfiles( object = SDDLS, st.data = st, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" ) # simplify arguments simplify <- list(CellGroup1 = c("CellType1", "CellType2", "CellType4"), CellGroup2 = c("CellType3", "CellType5")) SDDLS <- deconvSpatialDDLS( object = SDDLS, index.st = 1, simplify.set = simplify, simplify.majority = simplify )
Generate box or violin plots to show how errors are distributed. Errors can
be shown all mixed or either split by cell type (CellType
) or number
of cell types present in the spots (nCellTypes
). See the
facet.by
argument and examples for more details.
distErrorPlot( object, error, colors, x.by = "pBin", facet.by = NULL, color.by = "nCellTypes", filter.sc = TRUE, error.label = FALSE, pos.x.label = 4.6, pos.y.label = NULL, size.point = 0.1, alpha.point = 1, type = "violinplot", ylimit = NULL, nrow = NULL, ncol = NULL, title = NULL, theme = NULL, ... )
distErrorPlot( object, error, colors, x.by = "pBin", facet.by = NULL, color.by = "nCellTypes", filter.sc = TRUE, error.label = FALSE, pos.x.label = 4.6, pos.y.label = NULL, size.point = 0.1, alpha.point = 1, type = "violinplot", ylimit = NULL, nrow = NULL, ncol = NULL, title = NULL, theme = NULL, ... )
object |
|
error |
Error to be represented. Available metric errors are: absolute
error ( |
colors |
Vector of colors to be used. |
x.by |
Variable used for the X-axis. When |
facet.by |
Show data in different panels. Options are |
color.by |
Variable used to color data. Options are |
filter.sc |
Boolean indicating whether single-cell profiles are filtered
out and only mixed transcriptional profile errors are shown ( |
error.label |
Boolean indicating whether to show the average error as a
plot annotation ( |
pos.x.label |
X-axis position of error annotations. |
pos.y.label |
Y-axis position of error annotations. |
size.point |
Size of points (0.1 by default). |
alpha.point |
Alpha of points (0.1 by default). |
type |
Type of plot: |
ylimit |
Upper limit in Y-axis if it is required ( |
nrow |
Number of rows if |
ncol |
Number of columns if |
title |
Title of the plot. |
theme |
ggplot2 theme. |
... |
Additional arguments for the facet_wrap function
of ggplot2 if |
A ggplot object.
calculateEvalMetrics
corrExpPredPlot
blandAltmanLehPlot
barErrorPlot
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample( x = paste0("CellType", seq(6)), size = 20, replace = TRUE ) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # representation, for more examples, see the vignettes distErrorPlot( object = SDDLS, error = "AbsErr", facet.by = "CellType", color.by = "nCellTypes", error.label = TRUE ) distErrorPlot( object = SDDLS, error = "AbsErr", x.by = "CellType", facet.by = NULL, color.by = "CellType", error.label = TRUE )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 20, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(20))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(20)), Cell_Type = sample( x = paste0("CellType", seq(6)), size = 20, replace = TRUE ) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) # training of DDLS model SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 15, num.epochs = 5 ) # evaluation using test data SDDLS <- calculateEvalMetrics(object = SDDLS) # representation, for more examples, see the vignettes distErrorPlot( object = SDDLS, error = "AbsErr", facet.by = "CellType", color.by = "nCellTypes", error.label = TRUE ) distErrorPlot( object = SDDLS, error = "AbsErr", x.by = "CellType", facet.by = NULL, color.by = "CellType", error.label = TRUE )
Estimate the parameters of the ZINB-WaVE model using a real single-cell
RNA-Seq data set as reference to simulate new single-cell profiles and
increase the signal of underrepresented cell types. This step is only is
needed if the size of the single-cell RNA-seq dataset is too small or there
are underrepresented cell types. After this step, the
simSCProfiles
function will use the estimated parameters to
simulate new single-cell profiles. See ?simSCProfiles
for more
information.
estimateZinbwaveParams( object, cell.type.column, cell.ID.column, gene.ID.column, cell.cov.columns, gene.cov.columns, subset.cells = NULL, proportional = TRUE, set.type = "All", threads = 1, verbose = TRUE )
estimateZinbwaveParams( object, cell.type.column, cell.ID.column, gene.ID.column, cell.cov.columns, gene.cov.columns, subset.cells = NULL, proportional = TRUE, set.type = "All", threads = 1, verbose = TRUE )
object |
|
cell.type.column |
Name or column number corresponding to the cell type of each cell in cells metadata. |
cell.ID.column |
Name or column number corresponding to the cell names of expression matrix in cells metadata. |
gene.ID.column |
Name or column number corresponding to the notation used for features/genes in genes metadata. |
cell.cov.columns |
Name or column number(s) in cells metadata to be used
as covariates during model fitting (if no covariates are used, set to empty
or |
gene.cov.columns |
Name or column number(s) in genes metadata that will
be used as covariates during model fitting (if no covariates are used, set
to empty or |
subset.cells |
Number of cells to fit the ZINB-WaVE model. Useful when
the original data set is too large to fit the model. Set a value according
to the original data set and the resources available on your computer. If
|
proportional |
If |
set.type |
Cell type(s) to evaluate ( |
threads |
Number of threads used for estimation (1 by default). To set up the parallel environment, the BiocParallel package must be installed. |
verbose |
Show informative messages during the execution ( |
ZINB-WaVE is a flexible model for zero-inflated count data. This function
carries out the model fit to real single-cell data modeling (the
count of feature
for sample
) as a random variable following a
zero-inflated negative binomial (ZINB) distribution. The estimated parameters
will be used for the simulation of new single-cell expression profiles by
sampling a negative binomial distribution and inserting dropouts from a
binomial distribution. To do so, SpatialDDLS uses the
zinbFit
function from the zinbwave package
(Risso et al., 2018). For more details about the model, see Risso et al.,
2018.
A SpatialDDLS
object with zinb.params
slot containing a ZinbParametersModel
object. This
object contains a slot with the estimated ZINB-WaVE parameters from the
real single-cell RNA-Seq data.
Risso, D., Perraudeau, F., Gribkova, S. et al. (2018). A general and flexible method for signal extraction from single-cell RNA-seq data. Nat Commun 9, 284. doi: doi:10.1038/s41467-017-02554-5.
Torroja, C. and Sánchez-Cabo, F. (2019). digitalDLSorter: A Deep Learning algorithm to quantify immune cell populations based on scRNA-Seq data. Frontiers in Genetics 10, 978. doi: doi:10.3389/fgene.2019.00978.
set.seed(123) # reproducibility sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE ) SDDLS <- estimateZinbwaveParams( object = SDDLS, cell.type.column = "Cell_Type", cell.ID.column = "Cell_ID", gene.ID.column = "Gene_ID", subset.cells = 2, verbose = TRUE )
set.seed(123) # reproducibility sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE ) SDDLS <- estimateZinbwaveParams( object = SDDLS, cell.type.column = "Cell_Type", cell.ID.column = "Cell_ID", gene.ID.column = "Gene_ID", subset.cells = 2, verbose = TRUE )
features
slot in a
DeconvDLModel
objectGet and set features
slot in a
DeconvDLModel
object
features(object) features(object) <- value
features(object) features(object) <- value
object |
|
value |
Vector with features (genes) considered by the deep neural network model. |
Generate training and test cell type composition matrices for the simulation
of mixed transcriptional profiles with known cell composition using
single-cell expression profiles. The resulting
PropCellTypes
object will contain all the information
needed to simulate new mixed transcriptional profiles. Note this function
does not simulate the mixed profiles, this task is performed by the
simMixedProfiles
or trainDeconvModel
functions
(see Documentation).
genMixedCellProp( object, cell.ID.column, cell.type.column, num.sim.spots, n.cells = 50, train.freq.cells = 3/4, train.freq.spots = 3/4, proportion.method = c(0, 0, 1), prob.sparity = 1, min.zero.prop = NULL, balanced.type.cells = TRUE, verbose = TRUE )
genMixedCellProp( object, cell.ID.column, cell.type.column, num.sim.spots, n.cells = 50, train.freq.cells = 3/4, train.freq.spots = 3/4, proportion.method = c(0, 0, 1), prob.sparity = 1, min.zero.prop = NULL, balanced.type.cells = TRUE, verbose = TRUE )
object |
|
cell.ID.column |
Name or column number corresponding to cell names in cells metadata. |
cell.type.column |
Name or column number corresponding to cell types in cells metadata. |
num.sim.spots |
Number of mixed profiles to be simulated. It is recommended to adjust this number according to the number of available single-cell profiles. |
n.cells |
Specifies the number of cells to be randomly selected and combined to generate the simulated mixed profiles. By default, it is set to 50 It controls the level of noise present in the simulated data, as it determines how many single-cell profiles will be combined to produce each spot. |
train.freq.cells |
Proportion of cells used to simulate training mixed transcriptional profiles (3/4 by default). |
train.freq.spots |
Proportion of mixed transcriptional profiles to be
used for training, relative to the total number of simulated spots
( |
proportion.method |
Vector with three elements that controls the
proportion of simulated proportions generated by each method: random
sampling of a Dirichlet distribution, "pure" spots (1 cell type), and spots
generated from a random sampling of a Dirichlet distribution but with a
specified number of different cell types (determined by
|
prob.sparity |
It only affects the proportions generated by the first method (Dirichlet distribution). It determines the probability of having missing cell types in each simulated spot, as opposed to a mixture of all cell types. A higher value for this parameter will result in more sparse simulated samples. |
min.zero.prop |
This parameter controls the minimum number of cell types
that will be absent in each simulated spot. If |
balanced.type.cells |
Boolean indicating whether training and test cells
will be split in a balanced way considering cell types ( |
verbose |
Show informative messages during the execution ( |
First, the single-cell profiles are randomly divided into two subsets, with
2/3 of the data for training and 1/3 for testing. The default setting for
this ratio can be changed using the train.freq.cells
parameter. Next,
a total of num.sim.spots
mixed proportions are simulated using a
Dirichlet distribution. This simulation takes into account the probability of
missing cell types in each spot, which can be adjusted using the
prob.sparity
parameter. For each mixed sample, n.cells
single-cell profiles are randomly selected and combined to generate the
simulated mixed sample. In addition to the Dirichlet-based proportions, pure
spots (containing only one cell type) and spots containing a specified number
of different cell types (determined by the min.zero.prop
parameter)
are also generated in order to cover situations with only a few cell types
present. The proportion of simulated spots generated by each method can be
controlled using the proportion.method
parameter. To visualize the
distribution of cell type proportions generated by each method, the
showProbPlot
function can be used.
A SpatialDDLS
object with prob.cell.types
slot containing a list
with two PropCellTypes
objects (training and test). For more information about the structure of
this class, see ?PropCellTypes
.
simMixedProfiles
PropCellTypes
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE )
Getter function for the cell composition matrix. This function allows to access to the cell composition matrix of simulated mixed transcriptional profiles.
getProbMatrix(object, type.data)
getProbMatrix(object, type.data)
object |
|
type.data |
Subset of data to return: |
Cell type proportion matrix.
This function facilitates the installation of the required Python dependencies for the SpatialDDLS R package, as it requires a Python interpreter with the TensorFlow Python library and its dependencies.
installTFpython( conda = "auto", python.version = "3.8", tensorflow.version = "2.6", install.conda = FALSE, miniconda.path = NULL )
installTFpython( conda = "auto", python.version = "3.8", tensorflow.version = "2.6", install.conda = FALSE, miniconda.path = NULL )
conda |
Path to a conda executable. Using |
python.version |
Python version to be installed in the environment
( |
tensorflow.version |
Tensorflow version to be installed in the
environment ( |
install.conda |
Boolean indicating if installing miniconda automatically
by using reticulate. If |
miniconda.path |
If |
This function is intended to simplify the installation process for
SpatialDDLS by automatically installing Miniconda and creating a new
environment named SpatialDDLS-env with all SpatialDDLS' dependencies
covered. For users who wish to use a different Python or conda environment,
see the tensorflow::use_condaenv
function for more information.
No return value, called for side effects: installation of conda environment with a Python interpreter and Tensorflow
## Not run: notesInstallation <- installTFpython( conda = "auto", install.conda = TRUE ) ## End(Not run)
## Not run: notesInstallation <- installTFpython( conda = "auto", install.conda = TRUE ) ## End(Not run)
This function enables users to gain insights into the interpretability of the deconvolution model. It calculates the gradients of classes/loss function with respect to the input features used in training. These numeric values are calculated per gene and cell type in pure mixed transcriptional profiles, providing information on the extent to which each feature influences the model's prediction of cell proportions for each cell type.
interGradientsDL( object, method = "class", normalize = TRUE, scaling = "standardize", verbose = TRUE )
interGradientsDL( object, method = "class", normalize = TRUE, scaling = "standardize", verbose = TRUE )
object |
|
method |
Method to calculate gradients with respect to inputs. It can be
|
normalize |
Whether to normalize data using logCPM ( |
scaling |
How to scale data. It can be: |
verbose |
Show informative messages during the execution ( |
Gradients of classes / loss function with respect to the input features are calculated exclusively using pure mixed transcriptional profiles composed of a single cell type. Consequently, these numbers can be interpreted as the extent to which each feature is being used to predict each cell type proportion. Gradients are calculated at the sample level for each gene, but only mean gradients by cell type are reported. For additional details, see Mañanes et al., 2023.
Object containing gradients in the interpret.gradients
slot of
the DeconvDLModel
object (trained.model
slot).
deconvSpatialDDLS
plotTrainingHistory
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS)
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS)
This function loads a SpatialExperiment
object (or a list with several
SpatialExperiment
objects) into a
SpatialDDLS
object.
loadSTProfiles( object, st.data, st.spot.ID.column, st.gene.ID.column, st.min.counts = 0, st.min.spots = 0, st.n.slides = 3, verbose = TRUE )
loadSTProfiles( object, st.data, st.spot.ID.column, st.gene.ID.column, st.min.counts = 0, st.min.spots = 0, st.n.slides = 3, verbose = TRUE )
object |
A |
st.data |
A |
st.spot.ID.column |
Name or number of the column in spots metadata corresponding to spot names in the expression matrix. |
st.gene.ID.column |
Name or number of the column in genes metadata corresponding to names used for features/genes. |
st.min.counts |
Minimum gene counts to filter (0 by default). |
st.min.spots |
Minimum of spots with more than |
st.n.slides |
Minimum number of slides
( |
verbose |
Show informative messages during execution ( |
It is recommended to perform this step when creating the
SpatialDDLS
object using the
createSpatialDDLSobject
function in order to only keep genes
shared between the spatial transcriptomics and the single-cell
transcriptomics data used as reference. In addition, please, make sure the
gene identifiers used the spatial and single-cell transcriptomics data are
consistent.
A SpatialDDLS
object with the provided spatial
trainscriptomics data loaded into the spatial.experiments
slot.
createSpatialDDLSobject
trainDeconvModel
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) ## simulating a SpatialExperiment object counts <- matrix(rpois(30, lambda = 5), ncol = 6) rownames(counts) <- paste0("Gene", 1:5) colnames(counts) <- paste0("Spot", 1:6) coordinates <- matrix( c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), ncol = 2 ) ste <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", 1:5)), colData = data.frame(Cell_ID = paste0("Spot", 1:6)), spatialCoords = coordinates ) ## previous SpatialDDLS object SDDLS <- loadSTProfiles( object = SDDLS, st.data = ste, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) ## simulating a SpatialExperiment object counts <- matrix(rpois(30, lambda = 5), ncol = 6) rownames(counts) <- paste0("Gene", 1:5) colnames(counts) <- paste0("Spot", 1:6) coordinates <- matrix( c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), ncol = 2 ) ste <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", 1:5)), colData = data.frame(Cell_ID = paste0("Spot", 1:6)), spatialCoords = coordinates ) ## previous SpatialDDLS object SDDLS <- loadSTProfiles( object = SDDLS, st.data = ste, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" )
SpatialDDLS
objectLoad from an HDF5 file a trained deep neural network model into a
SpatialDDLS
object. Note that HDF5 file must be a valid
trained model (keras object).
loadTrainedModelFromH5(object, file.path, reset.slot = FALSE)
loadTrainedModelFromH5(object, file.path, reset.slot = FALSE)
object |
|
file.path |
Valid file path where the model are stored. |
reset.slot |
Deletes |
SpatialDDLS
object with trained.model
slot with the new keras DNN model incorporated.
trainDeconvModel
saveTrainedModelAsH5
method
slot in a PropCellTypes
objectGet and set method
slot in a PropCellTypes
object
method(object) method(object) <- value
method(object) method(object) <- value
object |
|
value |
Vector containing the method by which cell type proportions were generated. |
mixed.profiles
slot in a
SpatialExperiment
objectGet and set mixed.profiles
slot in a
SpatialExperiment
object
mixed.profiles(object, type.data = "both") mixed.profiles(object, type.data = "both") <- value
mixed.profiles(object, type.data = "both") mixed.profiles(object, type.data = "both") <- value
object |
|
type.data |
Type of data to return. It can be |
value |
List with two
|
model
slot in a DeconvDLModel
objectGet and set model
slot in a DeconvDLModel
object
model(object) model(object) <- value
model(object) model(object) <- value
object |
|
value |
|
Color spots on the spatial coordinates according to distances between intrinsic and extrinsic transcriptional profiles.
plotDistances( object, index.st, mid.scale = "mean", size.point = 1, title = NULL, theme = NULL )
plotDistances( object, index.st, mid.scale = "mean", size.point = 1, title = NULL, theme = NULL )
object |
A |
index.st |
Index of the spatial transcriptomics data to be plotted. It can be either a position or a name if a named list was provided. |
mid.scale |
The midpoint of the diverging scale. it may be |
size.point |
Size of points (0.1 by default). |
title |
Title of plot. |
theme |
ggplot2 theme. |
A ggplot object.
deconvSpatialDDLS
trainDeconvModel
Plot a heatmap showing the top positive and negative gene average gradients per cell type.
plotHeatmapGradsAgg( object, method = "class", top.n.genes = 15, scale.gradients = TRUE )
plotHeatmapGradsAgg( object, method = "class", top.n.genes = 15, scale.gradients = TRUE )
object |
|
method |
Method to calculate gradients with respect to input features.
It can be
|
top.n.genes |
Top n genes (positive and negative) taken per cell type. |
scale.gradients |
Whether to calculate feature-wise z-scores of
gradients ( |
A list of Heatmap-class
objects, one for top
positive and another one for top negative gradients.
interGradientsDL
trainDeconvModel
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS) plotHeatmapGradsAgg(SDDLS, top.n.genes = 2)
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS) plotHeatmapGradsAgg(SDDLS, top.n.genes = 2)
plots
slot in a PropCellTypes
objectGet and set plots
slot in a PropCellTypes
object
plots(object) plots(object) <- value
plots(object) plots(object) <- value
object |
|
value |
List of lists with plots showing the distribution of cell proportions generated by each method. |
Color spots on the spatial coordinates according to the results of clustering based on predicted proportions.
plotSpatialClustering( object, index.st, method, k.nn, k.centers, colors, size.point = 1, title = NULL, theme = NULL )
plotSpatialClustering( object, index.st, method, k.nn, k.centers, colors, size.point = 1, title = NULL, theme = NULL )
object |
A |
index.st |
Index of the spatial transcriptomics data to be plotted. It
can be either a position or a name if a named list of
|
method |
Clustering method results to plot. It can be |
k.nn |
Number of nearest neighbors used if |
k.centers |
Number of k centers used if |
colors |
Vector of colors to be used. |
size.point |
Size of points (0.1 by default). |
title |
Title of plot. |
theme |
ggplot2 theme. |
A ggplot object.
spatialPropClustering
deconvSpatialDDLS
Color spots on the spatial coordinates according to the logCPM values of a particular gene.
plotSpatialGeneExpr( object, index.st, gene, colors = "spectral", size.point = 1, title = NULL, theme = NULL )
plotSpatialGeneExpr( object, index.st, gene, colors = "spectral", size.point = 1, title = NULL, theme = NULL )
object |
A |
index.st |
Index of the spatial transcriptomics data to be plotted. It
can be either a position or a name if a named list of
|
gene |
Gene to color spots by. |
colors |
Color scale to be used. It can be |
size.point |
Size of points (0.1 by default). |
title |
Title of plot. |
theme |
ggplot2 theme. |
A ggplot object.
interGradientsDL
topGradientsCellType
Color spots on the spatial coordinates according to the predicted proportions of a particular cell type. Color scale is adapted depending on the range of predicted proportions.
plotSpatialProp( object, index.st, cell.type, colors = "blues", set = "raw", prediction = "Regularized", limits = NULL, size.point = 1, title = NULL, theme = NULL )
plotSpatialProp( object, index.st, cell.type, colors = "blues", set = "raw", prediction = "Regularized", limits = NULL, size.point = 1, title = NULL, theme = NULL )
object |
A |
index.st |
Index of the spatial transcriptomics data to be plotted. It
can be either a position or a name if a named list of
|
cell.type |
Cell type predicted proportions to color spots by. |
colors |
Color scale to be used. It can be |
set |
If results were simplified (see |
prediction |
It can be |
limits |
A vector of two elements indicating wanted limits for color
scale. If |
size.point |
Size of points (0.1 by default). |
title |
Title of plot. |
theme |
ggplot2 theme. |
A ggplot object.
plotSpatialPropAll
deconvSpatialDDLS
trainDeconvModel
Color spots on the spatial coordinates plot according to their predicted cell type proportions. All cell types are represented together using the same color scale from 0 to 1.
plotSpatialPropAll( object, index.st, colors = "blues", set = "raw", prediction = "Regularized", size.point = 0.1, title = NULL, nrow = NULL, ncol = NULL, theme = NULL )
plotSpatialPropAll( object, index.st, colors = "blues", set = "raw", prediction = "Regularized", size.point = 0.1, title = NULL, nrow = NULL, ncol = NULL, theme = NULL )
object |
A |
index.st |
Index of the spatial transcriptomics data to be plotted. It
can be either a position or a name if a named list of
|
colors |
Color scale to be used. It can be |
set |
If results were simplified (see |
prediction |
It can be |
size.point |
Size of points (0.1 by default). |
title |
Title of plot. |
nrow |
Number of rows in the split plot. |
ncol |
Number of columns in the split plot. |
theme |
ggplot2 theme. |
A ggplot object.
plotSpatialProp
deconvSpatialDDLS
trainDeconvModel
Plot training history of a trained SpatialDDLS deep neural network model.
plotTrainingHistory( object, title = "History of metrics during training", metrics = NULL )
plotTrainingHistory( object, title = "History of metrics during training", metrics = NULL )
object |
|
title |
Title of plot. |
metrics |
Metrics to be plotted. If |
A ggplot object with the progression of the selected metrics during training.
SpatialDDLS
object to be saved as an RDA fileThis function prepares a SpatialDDLS
object to be saved
as an RDA file when contains a DeconvDLModel
object with
a trained DNN model.
preparingToSave(object)
preparingToSave(object)
object |
|
Since keras models cannot be saved natively as R objects, this function
saves the structure of the model as a JSON-like character object and its
weights as a list. This allows for the retrieval of the model and making
predictions. It is important to note that the state of the optimizer is not
saved, only the model's architecture and weights. To save the entire model,
please see the saveTrainedModelAsH5
and
loadTrainedModelFromH5
functions.
It is also possible to save a SpatialDDLS
object as an
RDS file with the saveRDS
function without any preparation.
A SpatialDDLS
or
DeconvDLModel
object with its trained keras model
transformed from a keras.engine.sequential.Sequential
class into a
list
with its architecture as a JSON-like character object, and its
weights as a list.
prob.cell.types
slot in a
SpatialExperiment
objectGet and set prob.cell.types
slot in a
SpatialExperiment
object
prob.cell.types(object, type.data = "both") prob.cell.types(object, type.data = "both") <- value
prob.cell.types(object, type.data = "both") prob.cell.types(object, type.data = "both") <- value
object |
|
type.data |
Type of data to return. It can be |
value |
List with two |
prob.matrix
slot in a
PropCellTypes
objectGet and set prob.matrix
slot in a
PropCellTypes
object
prob.matrix(object) prob.matrix(object) <- value
prob.matrix(object) prob.matrix(object) <- value
object |
|
value |
Matrix with cell types as columns and samples as rows. |
project
slot in a
SpatialExperiment
objectGet and set project
slot in a
SpatialExperiment
object
project(object) project(object) <- value
project(object) project(object) <- value
object |
|
value |
Character indicating the name of the project. |
The PropCellTypes
class is a data storage class which
contains information related to cell type composition matrices used to
simulate mixed transcriptional profiles. This matrix is stored in the
prob.matrix
slot while the other slots contain additional information
generated during the process and required for subsequent steps.
See ?genMixedCellProp
function for information about how cell
type composition matrices are generated. Plots of cell type proportion
distributions can be accessed using the showProbPlot
function
(see ?showProbPlot
for more details).
prob.matrix
Matrix of cell type proportions to simulate mixed transcriptional profiles.
cell.names
Matrix containing cells used to generate the simulated mixed transcriptional profiles.
set.list
List of cells sorted by cell type.
set
Vector containing cell names present in the object.
method
Vector indicating the method by which cell type proportions were generated.
plots
Plots showing cell type proportion distributions. See
?showProbPlot
for more details.
type.data
Character indicating the type of data contained:
'train'
or 'test'
.
SpatialExperiment
objects as RDS filesSave SpatialExperiment
and
DeconvDLModel
objects as RDS files. keras models
cannot be stored natively as R objects (e.g. RData or RDS files). By saving
the architecture as a JSON-like character object and the weights as a list,
it is possible to retrieve a functional model and make new predictions. If
the trained.model
slot is empty, the function will behave as usual.
Note: with this option, the state of optimizer is not saved, only
model's architecture and weights. It is possible to save the entire model as
an HDF5 file with the saveTrainedModelAsH5
function and load it
into a SpatialExperiment
object with the
loadTrainedModelFromH5
function. See documentation for details.
saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL ) ## S4 method for signature 'DeconvDLModel' saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL ) ## S4 method for signature 'SpatialDDLS' saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL )
saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL ) ## S4 method for signature 'DeconvDLModel' saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL ) ## S4 method for signature 'SpatialDDLS' saveRDS( object, file, ascii = FALSE, version = NULL, compress = TRUE, refhook = NULL )
object |
|
file |
File path where the object will be saved |
ascii |
a logical. If |
version |
the workspace format version to use. |
compress |
a logical specifying whether saving to a named file is
to use |
refhook |
a hook function for handling reference objects. |
No return value, saves a
SpatialExperiment
object as an RDS file on
disk.
SpatialExperiment
saveTrainedModelAsH5
SpatialDDLS
deep neural network model to
disk as an HDF5 fileSave a trained SpatialDDLS
deep neural network model to
disk as an HDF5 file. Note that this function does not save the
DeconvDLModel
object, only the trained keras
model. This is the alternative to the saveRDS
and
preparingToSave
functions if you want to keep the state of the
optimizer.
saveTrainedModelAsH5(object, file.path, overwrite = FALSE)
saveTrainedModelAsH5(object, file.path, overwrite = FALSE)
object |
|
file.path |
Valid file path where to save the model to. |
overwrite |
Overwrite file if it already exists. |
No return value, saves a keras DNN trained model as HDF5 file on disk.
trainDeconvModel
loadTrainedModelFromH5
set
slot in a PropCellTypes
objectGet and set set
slot in a PropCellTypes
object
set(object) set(object) <- value
set(object) set(object) <- value
object |
|
value |
A vector containing the names of cells that are present in the object. |
set.list
slot in a
PropCellTypes
objectGet and set set.list
slot in a
PropCellTypes
object
set.list(object) set.list(object) <- value
set.list(object) set.list(object) <- value
object |
|
value |
List of cells sorted by their corresponding cell type. |
genMixedCellProp
Show distribution plots of the cell proportions generated by the
genMixedCellProp
function.
showProbPlot(object, type.data, set, type.plot = "boxplot")
showProbPlot(object, type.data, set, type.plot = "boxplot")
object |
|
type.data |
Subset of data to show: |
set |
Integer determining which of the 6 different subsets to display. |
type.plot |
Character determining which type of visualization to
display. It can be |
These frequencies will determine the proportion of different cell types used
during the simulation of mixed transcriptional profiles. Proportions
generated by each method (see ?genMixedCellProp
) can be
visualized in three ways: box plots, violin plots, and lines plots. You can
also plot the probabilities based on the number of different cell types
present in the samples by setting type.plot = 'nCellTypes'
.
A ggplot object.
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) showProbPlot( SDDLS, type.data = "train", set = 1, type.plot = "boxplot" )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", project = "Simul_example", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) showProbPlot( SDDLS, type.data = "train", set = 1, type.plot = "boxplot" )
Simulate training and test mixed spot transcriptional profiles using cell
composition matrices generated by the genMixedCellProp
function.
simMixedProfiles( object, type.data = "both", mixing.function = "AddRawCount", file.backend = NULL, compression.level = NULL, block.processing = FALSE, block.size = 1000, chunk.dims = NULL, threads = 1, verbose = TRUE )
simMixedProfiles( object, type.data = "both", mixing.function = "AddRawCount", file.backend = NULL, compression.level = NULL, block.processing = FALSE, block.size = 1000, chunk.dims = NULL, threads = 1, verbose = TRUE )
object |
|
type.data |
Type of data to generate: |
mixing.function |
Function used to build mixed transcriptional profiles. It may be:
|
file.backend |
Valid file path to store simulated mixed expression
profiles as an HDF5 file ( |
compression.level |
The compression level used if |
block.processing |
Boolean indicating whether data should be simulated
in blocks (only if |
block.size |
Only if |
chunk.dims |
Specifies the dimensions that HDF5 chunk will have. If
|
threads |
Number of threads used during simulation (1 by default). |
verbose |
Show informative messages during the execution ( |
Mixed profiles are generated under the assumption that the expression level
of a particular gene in a given spot is the sum of the expression levels of
the cell types that make it up weighted by their proportions. In practice, as
described in Torroja and Sanchez-Cabo, 2019, these profiles are generated by
summing gene expression levels of a determined number of cells specified by a
known cell composition matrix. The number of simulated spots and cells used
to simulate each spot are determined by the genMixedCellProp
function. This step can be avoided by using the on.the.fly
argument in
the trainDeconvModel
function.
SpatialDDLS allows to use HDF5 files as back-end to store simulated
data using the DelayedArray and HDF5Array packages. This
functionality allows to work without keeping the data loaded into RAM, which
could be useful during some computationally heavy steps such as neural
network training on RAM-limited machines. You must provide a valid file path
in the file.backend
argument to store the resulting file with the
'.h5' extension. This option slows down execution times, as subsequent
transformations of the data will be done in blocks. Note that if you use the
file.backend
argument with block.processing = FALSE
, all mixed
profiles will be simulated in one step and, thus, loaded into RAM. Then, the
matrix will be written to an HDF5 file. To avoid the RAM collapse, these
profiles can be simulated and written to HDF5 files in blocks of
block.size
size by setting block.processing = TRUE
. We
recommend this option accordingly to the computational resources available
and the number of simulated spots to be generated, but, in most of the cases,
it is not necessary.
A SpatialDDLS
object with mixed.profiles
slot containing a list with one or two entries (depending on selected
type.data
argument): 'train'
and 'test'
. Each entry
consists of a
SummarizedExperiment
object with the simulated mixed slot profiles.
Fischer B, Smith M and Pau, G (2020). rhdf5: R Interface to HDF5. R package version 2.34.0.
Pagès H, Hickey P and Lun A (2020). DelayedArray: A unified framework for working transparently with on-disk and in-memory array-like datasets. R package version 0.16.0.
Pagès H (2020). HDF5Array: HDF5 backend for DelayedArray objects. R package version 1.18.0.
genMixedCellProp
PropCellTypes
trainDeconvModel
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS, verbose = TRUE)
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(100, lambda = 5), nrow = 40, ncol = 30, dimnames = list(paste0("Gene", seq(40)), paste0("RHC", seq(30))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(30)), Cell_Type = sample(x = paste0("CellType", seq(4)), size = 30, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(40)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 10, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS, verbose = TRUE)
Simulate single-cell expression profiles by randomly sampling from a negative
binomial distribution and inserting dropouts by sampling from a binomial
distribution using the ZINB-WaVE parameters estimated by the
estimateZinbwaveParams
function.
simSCProfiles( object, cell.ID.column, cell.type.column, n.cells, suffix.names = "_Simul", cell.types = NULL, file.backend = NULL, name.dataset.backend = NULL, compression.level = NULL, block.processing = FALSE, block.size = 1000, chunk.dims = NULL, verbose = TRUE )
simSCProfiles( object, cell.ID.column, cell.type.column, n.cells, suffix.names = "_Simul", cell.types = NULL, file.backend = NULL, name.dataset.backend = NULL, compression.level = NULL, block.processing = FALSE, block.size = 1000, chunk.dims = NULL, verbose = TRUE )
object |
|
cell.ID.column |
Name or column number corresponding to the cell names of expression matrix in cells metadata. |
cell.type.column |
Name or column number corresponding to the cell type of each cell in cells metadata. |
n.cells |
Number of simulated cells generated per cell type (i.e. if you
have 10 different cell types in your dataset, if |
suffix.names |
Suffix used on simulated cells. This suffix must be unique in the simulated cells, so make sure that this suffix does not appear in the real cell names. |
cell.types |
Vector indicating the cell types to simulate. If
|
file.backend |
Valid file path to store the simulated single-cell
expression profiles as an HDF5 file ( |
name.dataset.backend |
Name of the dataset in HDF5 file to be used. Note
that it cannot exist. If |
compression.level |
The compression level used if |
block.processing |
Boolean indicating whether the data should be
simulated in blocks (only if |
block.size |
Only if |
chunk.dims |
Specifies the dimensions that HDF5 chunk will have. If
|
verbose |
Show informative messages during the execution ( |
Before this step, see ?estimateZinbwaveParams
. As described in
Torroja and Sanchez-Cabo, 2019, this function simulates a given number of
transcriptional profiles for each cell type provided by randomly sampling
from a negative binomial distribution with and
estimated parameters and inserting dropouts by sampling from a binomial
distribution with probability pi. All parameters are estimated from
single-cell real data using the
estimateZinbwaveParams
function. It uses the ZINB-WaVE model (Risso et al., 2018). For more details
about the model, see ?estimateZinbwaveParams
and Risso et al.,
2018.
The file.backend
argument allows to create a HDF5 file with simulated
single-cell profiles to be used as back-end to work with data stored on disk
instead of loaded into RAM. If the file.backend
argument is used with
block.processing = FALSE
, all the single-cell profiles will be
simulated in one step and, therefore, loaded into in RAM memory. Then, data
will be written in HDF5 file. To avoid to collapse RAM memory if too many
single-cell profiles are goin to be simulated, single-cell profiles can be
simulated and written to HDF5 files in blocks of block.size
size by
setting block.processing = TRUE
.
A SpatialDDLS
object with
single.cell.simul
slot containing a
SingleCellExperiment
object with the simulated single-cell expression profiles.
Risso, D., Perraudeau, F., Gribkova, S. et al. (2018). A general and flexible method for signal extraction from single-cell RNA-seq data. Nat Commun 9, 284. doi: doi:10.1038/s41467-017-02554-5.
Torroja, C. and Sánchez-Cabo, F. (2019). digitalDLSorter: A Deep Learning algorithm to quantify immune cell populations based on scRNA-Seq data. Frontiers in Genetics 10, 978. doi: doi:10.3389/fgene.2019.00978.
set.seed(123) # reproducibility sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- estimateZinbwaveParams( object = SDDLS, cell.type.column = "Cell_Type", cell.ID.column = "Cell_ID", gene.ID.column = "Gene_ID", subset.cells = 2, verbose = TRUE ) SDDLS <- simSCProfiles( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", n.cells = 2, verbose = TRUE )
set.seed(123) # reproducibility sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE, project = "Simul_example" ) SDDLS <- estimateZinbwaveParams( object = SDDLS, cell.type.column = "Cell_Type", cell.ID.column = "Cell_ID", gene.ID.column = "Gene_ID", subset.cells = 2, verbose = TRUE ) SDDLS <- simSCProfiles( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", n.cells = 2, verbose = TRUE )
single.cell.real
slot in a
SpatialExperiment
objectGet and set single.cell.real
slot in a
SpatialExperiment
object
single.cell.real(object) single.cell.real(object) <- value
single.cell.real(object) single.cell.real(object) <- value
object |
|
value |
|
single.cell.simul
slot in a
SpatialExperiment
objectGet and set single.cell.simul
slot in a
SpatialExperiment
object
single.cell.simul(object) single.cell.simul(object) <- value
single.cell.simul(object) single.cell.simul(object) <- value
object |
|
value |
|
spatial.experiments
slot in a
SpatialExperiment
objectGet and set spatial.experiments
slot in a
SpatialExperiment
object
spatial.experiments(object, index.st = NULL) spatial.experiments(object, index.st = NULL) <- value
spatial.experiments(object, index.st = NULL) spatial.experiments(object, index.st = NULL) <- value
object |
|
index.st |
Index of the spatial transcriptomics data within the list. It
can be either an position or a name if a named list was provided. If
|
value |
List in which each element is a
|
The SpatialDDLS
object is the core of the
SpatialDDLS package. This object stores different intermediate data
needed for the construction of new deconvolution models, the spatial
transcriptomics profiles to be deconvoluted, and the predicted cell type
proportions.
This object uses other classes to store different types of data generated during the workflow:
SingleCellExperiment
class for single-cell RNA-Seq data
storage, using sparse matrix from the Matrix package
(dgCMatrix
class) or HDF5Array
class in case of
using HDF5 files as back-end (see below for more information).
SpatialExperiment
class
for spatial transcriptomics data
storage.
zinbModel
class with estimated
parameters for the simulation of new single-cell profiles.
SummarizedExperiment
class for simulated
mixed transcriptional profiles storage.
PropCellTypes
class for composition cell type
matrices. See ?PropCellTypes
for details.
DeconvDLModel
class to store information related to
deep neural network models. See ?DeconvDLModel
for
details.
In order to provide a way to work with large amounts of data in RAM-constrained machines, we provide the possibility of using HDF5 files as back-end to store count matrices of both real and simulated single-cell profiles by using the HDF5Array and DelayedArray classes from the homonymous packages.
single.cell.real
Real single-cell data stored in a
SingleCellExperiment
object. The count matrix is stored either as
dgCMatrix
or HDF5Array
objects.
spatial.experiments
List of
SpatialExperiment
objects to be deconvoluted.
zinb.params
zinbModel
object with estimated
parameters for the simulation of new single-cell expression profiles.
single.cell.simul
Simulated single-cell expression profiles using the ZINB-WaVE model.
prob.cell.types
PropCellTypes
class with cell
composition matrices built for the simulation of mixed transcriptional
profiles with known cell composition.
mixed.profiles
List of simulated train and test mixed transcriptional
profiles. Each entry is a
SummarizedExperiment
object. Count
matrices can be stored as HDF5Array
objects using HDF5 files as
back-end in case of RAM limitations.
trained.model
DeconvDLModel
object with
information related to the deconvolution model. See
?DeconvDLModel
for more details.
deconv.spots
Deconvolution results. It consists of a list where each
element corresponds to the results for each
SpatialExperiment
object contained in the
spatial.experiments
slot.
project
Name of the project.
version
Version of SpatialDDLS this object was built under.
SpatialDDLS is an R package that provides a neural network-based solution for cell type deconvolution of spatial transcriptomics data. The package takes advantage of single-cell RNA sequencing (scRNA-seq) data to simulate mixed transcriptional profiles with known cell composition and train fully-connected neural networks to predict the cell type composition of spatial transcriptomics spots. The resulting trained models can be applied to new spatial transcriptomics data to predict cell type proportions, allowing for more accurate cell type identification and characterization of spatially-resolved transcriptomic data. Finally, predictions are forced to keep spatial consistency through a process we refer to as spatial regularization. Overall, SpatialDDLS is a powerful tool for cell type deconvolution in spatial transcriptomics data, providing a reliable, fast and flexible solution for researchers in the field. See Mañanes et al. (2024) (doi:10.1093/bioinformatics/btae072) and some examples (https://diegommcc.github.io/SpatialDDLS/) for more details.
Cluster spatial transcriptomics data according to the cell proportions predicted in each spot. It allows to segregate ST data into niches with similar cell composition.
spatialPropClustering( object, index.st, method = "graph", k.nn = 10, k.centers = 5, verbose = TRUE )
spatialPropClustering( object, index.st, method = "graph", k.nn = 10, k.centers = 5, verbose = TRUE )
object |
|
index.st |
Name or index of the dataset/slide already deconvoluted to be clustered. If missing, all datasets already deconvoluted will be clustered. |
method |
Clustering method. It can be |
k.nn |
An integer specifying the number of nearest neighbors to be used
during graph construction (10 by default). Only if
|
k.centers |
An integer specifying the number of centers for k-means
algorithm (5 by default). Only if |
verbose |
Show informative messages during the execution ( |
A SpatialDDLS
object containing computed
clusters as a column in the slot colData
of the
SpatialExperiment
objects.
plotTrainingHistory
deconvSpatialDDLS
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( SDDLS, batch.size = 12, num.epochs = 5 ) # simulating spatial data ngenes <- sample(3:40, size = 1) ncells <- sample(10:40, size = 1) counts <- matrix( rpois(ngenes * ncells, lambda = 5), ncol = ncells, dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells))) ) coordinates <- matrix( rep(c(1, 2), ncells), ncol = 2 ) st <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))), colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))), spatialCoords = coordinates ) SDDLS <- loadSTProfiles( object = SDDLS, st.data = st, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" ) SDDLS <- deconvSpatialDDLS( SDDLS, index.st = 1 ) SDDLS <- spatialPropClustering(SDDLS, index.st = 1, k.nn = 5)
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( SDDLS, batch.size = 12, num.epochs = 5 ) # simulating spatial data ngenes <- sample(3:40, size = 1) ncells <- sample(10:40, size = 1) counts <- matrix( rpois(ngenes * ncells, lambda = 5), ncol = ncells, dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells))) ) coordinates <- matrix( rep(c(1, 2), ncells), ncol = 2 ) st <- SpatialExperiment::SpatialExperiment( assays = list(counts = as.matrix(counts)), rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))), colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))), spatialCoords = coordinates ) SDDLS <- loadSTProfiles( object = SDDLS, st.data = st, st.spot.ID.column = "Cell_ID", st.gene.ID.column = "Gene_ID" ) SDDLS <- deconvSpatialDDLS( SDDLS, index.st = 1 ) SDDLS <- spatialPropClustering(SDDLS, index.st = 1, k.nn = 5)
test.deconv.metrics
slot in a
DeconvDLModel
objectGet and set test.deconv.metrics
slot in a
DeconvDLModel
object
test.deconv.metrics(object, metrics = "All") test.deconv.metrics(object, metrics = "All") <- value
test.deconv.metrics(object, metrics = "All") test.deconv.metrics(object, metrics = "All") <- value
object |
|
metrics |
Metrics to show ( |
value |
List with evaluation metrics to assess the performance of the model on each sample of test data. |
test.metrics
slot in a
DeconvDLModel
objectGet and set test.metrics
slot in a
DeconvDLModel
object
test.metrics(object) test.metrics(object) <- value
test.metrics(object) test.metrics(object) <- value
object |
|
value |
List with evaluation metrics after prediction on test data. |
test.pred
slot in a
DeconvDLModel
objectGet and set test.pred
slot in a
DeconvDLModel
object
test.pred(object) test.pred(object) <- value
test.pred(object) test.pred(object) <- value
object |
|
value |
Matrix object with prediction results on test data. |
Retrieve feature names with the largest/smallest gradients per cell
type. These genes can be used to visualize their spatial expression
in the ST data (plotGeneSpatial
function) or to plot the calculated
gradients as a heatmap (plotGradHeatmap
function).
topGradientsCellType(object, method = "class", top.n.genes = 15)
topGradientsCellType(object, method = "class", top.n.genes = 15)
object |
|
method |
Method gradients were calculated by. It can be either
|
top.n.genes |
Top n genes (positive and negative) taken per cell type. |
List of gene names with the top positive and negative gradients per cell type.
interGradientsDL
trainDeconvModel
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS) listGradients <- topGradientsCellType(SDDLS) lapply(listGradients, head, n = 5)
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 ) ## calculating gradients SDDLS <- interGradientsDL(SDDLS) listGradients <- topGradientsCellType(SDDLS) lapply(listGradients, head, n = 5)
Train a deep neural network model using training data from the
SpatialDDLS
object. This model will be used to
deconvolute spatial transcriptomics data from the same biological context as
the single-cell RNA-seq data used to train it. In addition, the trained
model is evaluated using test data, and prediction results are obtained to
determine its performance (see ?calculateEvalMetrics
).
trainDeconvModel( object, type.data.train = "mixed", type.data.test = "mixed", batch.size = 64, num.epochs = 60, num.hidden.layers = 2, num.units = c(200, 200), activation.fun = "relu", dropout.rate = 0.25, loss = "kullback_leibler_divergence", metrics = c("accuracy", "mean_absolute_error", "categorical_accuracy"), normalize = TRUE, scaling = "standardize", norm.batch.layers = TRUE, custom.model = NULL, shuffle = TRUE, sc.downsampling = NULL, use.generator = FALSE, on.the.fly = FALSE, agg.function = "AddRawCount", threads = 1, view.metrics.plot = TRUE, verbose = TRUE )
trainDeconvModel( object, type.data.train = "mixed", type.data.test = "mixed", batch.size = 64, num.epochs = 60, num.hidden.layers = 2, num.units = c(200, 200), activation.fun = "relu", dropout.rate = 0.25, loss = "kullback_leibler_divergence", metrics = c("accuracy", "mean_absolute_error", "categorical_accuracy"), normalize = TRUE, scaling = "standardize", norm.batch.layers = TRUE, custom.model = NULL, shuffle = TRUE, sc.downsampling = NULL, use.generator = FALSE, on.the.fly = FALSE, agg.function = "AddRawCount", threads = 1, view.metrics.plot = TRUE, verbose = TRUE )
object |
|
type.data.train |
Type of profiles to be used for training. It can be
|
type.data.test |
Type of profiles to be used for evaluation. It can be
|
batch.size |
Number of samples per gradient update (64 by default). |
num.epochs |
Number of epochs to train the model (60 by default). |
Number of hidden layers of the neural network (2 by
default). This number must be equal to the length of |
|
num.units |
Vector indicating the number of neurons per hidden layer
( |
activation.fun |
Activation function ( |
dropout.rate |
Float between 0 and 1 indicating the fraction of input neurons to be dropped in layer dropouts (0.25 by default). By default, SpatialDDLS implements 1 dropout layer per hidden layer. |
loss |
Character indicating loss function selected for model training
( |
metrics |
Vector of metrics used to assess model performance during
training and evaluation ( |
normalize |
Whether to normalize data using logCPM ( |
scaling |
How to scale data before training. It can be:
|
norm.batch.layers |
Whether to include batch normalization layers
between each hidden dense layer ( |
custom.model |
It allows to use a custom neural network architecture. It
must be a |
shuffle |
Boolean indicating whether data will be shuffled ( |
sc.downsampling |
It is only used if |
use.generator |
Boolean indicating whether to use generators during
training and test. Generators are automatically used when |
on.the.fly |
Boolean indicating whether simulated data will be generated
'on the fly' during training ( |
agg.function |
If
|
threads |
Number of threads used during simulation of mixed
transcriptional profiles if |
view.metrics.plot |
Boolean indicating whether to show plots of loss and
evaluation metrics during training ( |
verbose |
Boolean indicating whether to display model progression during
training and model architecture information ( |
Simulation of mixed transcriptional profiles 'on the fly'
trainDeconvModel
can avoid storing simulated mixed spot profiles by
using the on.the.fly
argument. This functionality aims at reducing the
the simMixedProfiles
function's memory usage: simulated profiles are
built in each batch during training/evaluation.
Neural network architecture
It is possible to change the model's architecture: number of hidden layers,
number of neurons for each hidden layer, dropout rate, activation function,
and loss function. For more customized models, it is possible to provide a
pre-built model through the custom.model
argument (a
keras.engine.sequential.Sequential
object) where it is necessary that
the number of input neurons is equal to the number of considered
features/genes, and the number of output neurons is equal to the number of
considered cell types.
A SpatialDDLS
object with trained.model
slot containing a DeconvDLModel
object. For more
information about the structure of this class, see
?DeconvDLModel
.
plotTrainingHistory
deconvSpatialDDLS
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 )
set.seed(123) sce <- SingleCellExperiment::SingleCellExperiment( assays = list( counts = matrix( rpois(30, lambda = 5), nrow = 15, ncol = 10, dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10))) ) ), colData = data.frame( Cell_ID = paste0("RHC", seq(10)), Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, replace = TRUE) ), rowData = data.frame( Gene_ID = paste0("Gene", seq(15)) ) ) SDDLS <- createSpatialDDLSobject( sc.data = sce, sc.cell.ID.column = "Cell_ID", sc.gene.ID.column = "Gene_ID", sc.filt.genes.cluster = FALSE ) SDDLS <- genMixedCellProp( object = SDDLS, cell.ID.column = "Cell_ID", cell.type.column = "Cell_Type", num.sim.spots = 50, train.freq.cells = 2/3, train.freq.spots = 2/3, verbose = TRUE ) SDDLS <- simMixedProfiles(SDDLS) SDDLS <- trainDeconvModel( object = SDDLS, batch.size = 12, num.epochs = 5 )
trained.model
slot in a
SpatialExperiment
objectGet and set trained.model
slot in a
SpatialExperiment
object
trained.model(object) trained.model(object) <- value
trained.model(object) trained.model(object) <- value
object |
|
value |
|
training.history
slot in a
DeconvDLModel
objectGet and set training.history
slot in a
DeconvDLModel
object
training.history(object) training.history(object) <- value
training.history(object) training.history(object) <- value
object |
|
value |
|
zinb.params
slot in a
SpatialExperiment
objectGet and set zinb.params
slot in a
SpatialExperiment
object
zinb.params(object) zinb.params(object) <- value
zinb.params(object) zinb.params(object) <- value
object |
|
value |
|
The ZinbParametersModel class is a wrapper class for the
zinbModel
class from the
zinbwave package.
This wrapper class contains the zinbwave.model
slot, which holds a
valid zinbModel
object.
zinbwave.model
A valid
zinbModel
object.
Risso, D., Perraudeau, F., Gribkova, S. et al. (2018). A general and flexible method for signal extraction from single-cell RNA-seq data. Nat Commun 9, 284. doi: doi:10.1038/s41467-017-02554-5.
zinbwave.model
slot in a
ZinbParametersModel
objectGet and set zinbwave.model
slot in a
ZinbParametersModel
object
zinbwave.model(object) zinbwave.model(object) <- value
zinbwave.model(object) zinbwave.model(object) <- value
object |
|
value |
|