Skip to content

Commit

Permalink
update installation instructions [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 8, 2024
1 parent 17ad138 commit e0c2a3f
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 79 deletions.
8 changes: 7 additions & 1 deletion R/get_gencc.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,17 @@ get_gencc <- function(agg_by=c("disease_id",
# to = c("OMIM","DECIPHER","Orphanet"))
#### Encode evidence numerically ####
d[,evidence_score:=dict[classification_title]]
#### Report ####
messager("Evidence scores for:",
"\n -",length(unique(d$disease_id)),"diseases",
"\n -",length(unique(d$gene_symbol)),"genes")
#### Aggregate so that there's 1 entry/gene/disease ####
if(!is.null(agg_by)){
d <- d[,list(evidence_score_min=min(evidence_score, na.rm = TRUE),
evidence_score_max=max(evidence_score, na.rm = TRUE),
evidence_score_mean=mean(evidence_score, na.rm=TRUE)),
evidence_score_mean=mean(evidence_score, na.rm=TRUE),
evidence_score_sum=sum(evidence_score, na.rm=TRUE),
evidence_score_sd=sd(evidence_score, na.rm=TRUE)),
by=agg_by]
}
#### Add version ####
Expand Down
14 changes: 13 additions & 1 deletion R/get_ontology.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ get_ontology <- function(name=c("mondo",
name <- name[1]
method <- match.arg(method)

save_path <- file.path(save_dir, paste0(name,".rds"))
if(isFALSE(force_new) && file.exists(save_path)){
messager("Loading cached ontology:",save_path)
ont <- readRDS(save_path)
return(ont)
}
ol <- rols::Ontologies()
rols_opts <- get_ols_options(ol=ol)
if(method=="rols" &&
Expand Down Expand Up @@ -111,7 +117,13 @@ get_ontology <- function(name=c("mondo",
}
#### Subset ontology ####
ont <- filter_ontology(ont = ont,
terms = terms)
terms = terms)
#### Cache RDS object ####
if(!is.null(save_dir)){
dir.create(save_dir, recursive=TRUE, showWarnings = FALSE)
messager("Saving ontology -->",save_path)
saveRDS(ont, save_path)
}
#### Return ####
return(ont)
}
37 changes: 19 additions & 18 deletions R/map_upheno_data_i.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,21 @@ map_upheno_data_i <- function(pheno_map_method,
all.y = keep_nogenes,
suffixes = c(1,2),
allow.cartesian = TRUE
)
)
pheno_map_genes[,db2:=id2_db]
## Fill in missing species for those without gene data
# pheno_map_genes[
# is.na(gene_taxon_label1),
# gene_taxon_label1:=species_map[db1]$subject_taxon_label]
# pheno_map_genes[
# is.na(gene_taxon_label2),
# gene_taxon_label2:=species_map[db2]$subject_taxon_label]
pheno_map_genes[
is.na(gene_taxon_label1),
gene_taxon_label1:=species_map[db1]$subject_taxon_label]
pheno_map_genes[
is.na(gene_taxon_label2),
gene_taxon_label2:=species_map[db2]$subject_taxon_label]
## Add gene counts
# pheno_map_genes[,n_genes_db1:=data.table::uniqueN(gene_label1), by="id1"]
# pheno_map_genes[,n_genes_db2:=data.table::uniqueN(gene_label2), by="id2"]
pheno_map_genes[,n_genes_db1:=data.table::uniqueN(gene_label1), by="id1"]
pheno_map_genes[,n_genes_db2:=data.table::uniqueN(gene_label2), by="id2"]
## Report
messager(data.table::uniqueN(pheno_map_genes$subject_taxon_label2),"/",
data.table::uniqueN(genes_homol$subject_taxon_label),
messager(data.table::uniqueN(pheno_map_genes$gene_taxon_label2),"/",
data.table::uniqueN(genes_homol$gene_taxon_label),
"species remain after cross-species phenotype mapping.")
## Remove
# remove(genes_human,genes_nonhuman,pheno_map)
Expand All @@ -116,18 +117,18 @@ map_upheno_data_i <- function(pheno_map_method,
}
pheno_map_genes_match <-
pheno_map_genes_match[,
list(n_genes_intersect=data.table::uniqueN(hgnc_id2)),
by=c("id1","db1","label1","n_genes_db1",
"id2","db2","label2","n_genes_db2",
"subject_taxon1","subject_taxon_label1",
"subject_taxon2","subject_taxon_label2",
list(n_genes_intersect=data.table::uniqueN(hgnc2)),
by=c("id1","db1","object_label1","n_genes_db1",
"id2","db2","object_label2","n_genes_db2",
"gene_taxon1","gene_taxon_label1",
"gene_taxon2","gene_taxon_label2",
"equivalence_score","subclass_score")
] |>
data.table::setorderv("n_genes_intersect",-1)
pheno_map_genes_match[,n_phenotypes:=data.table::uniqueN(id1),
by=c("db1","db2",
"subject_taxon1","subject_taxon2",
"subject_taxon_label1","subject_taxon_label2"
"gene_taxon1","gene_taxon2",
"gene_taxon_label1","gene_taxon_label2"
)]
pheno_map_genes_match[,prop_intersect:=(n_genes_intersect/n_genes_db1)]
## Compute a score that captures both the phenotype mapping score and
Expand Down
28 changes: 13 additions & 15 deletions R/plot_upheno.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,25 @@
#' pheno_map_genes_match = pheno_map_genes_match)
#' }
plot_upheno <- function(pheno_map_genes_match=NULL,
subset_db1="HP",
filters=list(db1="HP",
gene_taxon_label1="Homo sapiens"),
types=c("rainplot","scatterplot","heatmap")){
## Prepare plot data
n_phenotypes <- object_db <- subject <-NULL;
id1 <- n_phenotypes <-NULL;
{
#### Subset data ####
if(!is.null(subset_db1)) {
plot_dat <- pheno_map_genes_match[object_db %in% subset_db1,]
messager(data.table::uniqueN(plot_dat$subject_taxon_label2),"/",
data.table::uniqueN(pheno_map_genes_match$subject_taxon_label2),
"species remain after filtering by `subset_db1`.")
}else {
plot_dat <- data.table::copy(pheno_map_genes_match)
}
plot_dat <- filter_dt(pheno_map_genes_match,
filters = filters)
messager(data.table::uniqueN(plot_dat$gene_taxon_label2),"/",
data.table::uniqueN(pheno_map_genes_match$gene_taxon_label2),
"species remain after filtering by `subset_db1`.")
## Only use one phenotype-phenotype per row
plot_dat <- plot_dat[,.SD[1],by=c("object_db","subject","object_db","object")]
plot_dat <- plot_dat[,.SD[1],by=c("db1","id1","db2","id2")]
## Recompute n_phenotypes
plot_dat[,n_phenotypes:=data.table::uniqueN(subject),
by=c("object_db","object_db",
"subject_taxon1","subject_taxon2",
"subject_taxon_label1","subject_taxon_label2"
plot_dat[,n_phenotypes:=data.table::uniqueN(id1),
by=c("db1","db2",
"gene_taxon1","gene_taxon2",
"gene_taxon_label1","gene_taxon_label2"
)]
}
plots <- list()
Expand Down
58 changes: 41 additions & 17 deletions R/plot_upheno_heatmap.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,45 @@
#' @describeIn plot_ plot_
plot_upheno_heatmap <- function(plot_dat,
ont,
ont=get_ontology("upheno",
add_ancestors = 10),
hpo_ids=NULL,
value.var=c("phenotype_genotype_score",
"prop_intersect",
"equivalence_score",
"subclass_score"),
name=value.var[1],
min_rowsums=NULL,
cluster_from_ontology=FALSE,
save_dir=tempdir(),
height = 15,
width = 10){
requireNamespace("ComplexHeatmap")
subject <- hpo_id <- label1 <- NULL;
hpo_id <- object_label1 <- NULL;
set.seed(2023)
value.var <- match.arg(value.var)
value.var <- value.var[1]
# hpo_ids <- MultiEWCE::example_targets$top_targets$hpo_id

### Subset phenotypes
if(!is.null(hpo_ids)) plot_dat <- plot_dat[subject %in% unique(hpo_ids)]
plot_dat[,hpo_id:=subject][,label1:=gsub(" (HPO)","",label1,fixed = TRUE)]
plot_dat <- add_ancestors(plot_dat)
data.table::setkeyv(plot_dat,"label1")
if(!is.null(hpo_ids)) plot_dat <- plot_dat[id1 %in% unique(hpo_ids)]
plot_dat[,hpo_id:=id1][,object_label1:=gsub(" (HPO)","",
object_label1,fixed = TRUE)]
if(isFALSE(cluster_from_ontology)){
ont <- add_ancestors(ont)
if(!"ancestor_name" %in% names(plot_dat)){
plot_dat <- merge(
plot_dat,
ont@elementMetadata[,c("short_id","ancestor_name")],
by.x="id1",
by.y="short_id"
)
}
}
data.table::setkeyv(plot_dat,"object_label1")

### Plot
X <- data.table::dcast.data.table(plot_dat,
formula = label1 ~ subject_taxon_label2,
formula = object_label1 ~ gene_taxon_label2,
fill = 0,
value.var = value.var,
fun.aggregate = mean,
Expand All @@ -39,33 +53,43 @@ plot_upheno_heatmap <- function(plot_dat,

#### Get clusters from ontology ####
if(isTRUE(cluster_from_ontology)){
nms <- map_ontology_terms(ont = ont,
terms = plot_dat$id1,
to = "name",
keep_order = FALSE)
nms <- intersect(nms, rownames(X))
ids <- map_ontology_terms(ont = ont,
terms = plot_dat$subject,
terms =nms,
to = "id",
keep_order = FALSE)
ids <- ids[ids %in% rownames(X)]
## best to do this on the entire HPO, then subset
cluster_rows <- ontology_to(to="igraph_dist_hclust",
terms = names(ids))
## best to do this on the entire HPO, then subset
gd <- ontology_to(ont=ont,
to="igraph_dist",
terms = unname(ids))
cluster_rows <- stats::hclust(d = as.dist(gd[unname(ids),unname(ids)]))
# cluster_rows[cluster_rows$labels[1:10]]
# leaves <- dendextend::get_leaves_attr(cluster_rows,"label")
# c2 <- dendextend::prune(cluster_rows,
# leaves[!leaves %in% names(ids)],
# keep_branches = FALSE)
## subset to only those in the heatmap
X <- X[ids,]
X <- X[nms,]
} else {
cluster_rows <- TRUE
}
#### Add annotation ####
cols <- c("object_label1","n_genes_db1",
"ancestor_name")
cols <- cols[cols %in% names(plot_dat)]
annot_dat <- plot_dat[rownames(X),
c("label1","n_genes_db1",
"ancestor_name")] |> unique()
cols, with=FALSE] |> unique()
# col_fun <- colorRamp2::colorRamp2(
# seq(min(annot_dat$n_genes_db1),
# max(annot_dat$n_genes_db1),
# length.out=4),
# pals::gnuplot(4))
la <- ComplexHeatmap::rowAnnotation(
df=data.frame(annot_dat[,-c("label1")],
df=data.frame(annot_dat[,-c("object_label1")],
row.names = annot_dat$label1),
# ?ComplexHeatmap::Legend
show_legend = c(TRUE, FALSE),
Expand All @@ -75,7 +99,7 @@ plot_upheno_heatmap <- function(plot_dat,
row_split <- if(isFALSE(cluster_from_ontology)) annot_dat$ancestor_name
#### make heatmap ####
ch <- ComplexHeatmap::Heatmap(matrix = as.matrix(X),
name = value.var,
name = name,
cluster_rows = cluster_rows,
row_title_rot = 0,
row_names_gp = grid::gpar(fontsize = 7),
Expand Down
6 changes: 3 additions & 3 deletions R/plot_upheno_rainplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ plot_upheno_rainplot <- function(plot_dat){

### Plot proportion of intersecting orthologs per ontology ####
ggplot2::ggplot(plot_dat,
ggplot2::aes(x=paste0(subject_taxon_label2,
ggplot2::aes(x=paste0(gene_taxon_label2,
"\n(n = ",n_phenotypes," phenotypes)"),
y=(n_genes_intersect/n_genes_db1),
fill=factor(object_db))) +
ggplot2::facet_grid(object_db~.,
fill=factor(db2))) +
ggplot2::facet_grid(db2~.,
scales = "free_y",
space = "free_y") +
# add half-violin from {ggdist} package
Expand Down
2 changes: 1 addition & 1 deletion R/plot_upheno_scatterplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ plot_upheno_scatterplot <- function(plot_dat){
ggplot2::aes(x=(n_genes_intersect/n_genes_db1),
y=equivalence_score)) +
ggplot2::geom_point() +
ggplot2::facet_grid(rows = "subject_taxon_label2") +
ggplot2::facet_grid(rows = "gene_taxon_label2") +
ggplot2::geom_smooth()
}
4 changes: 2 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ If you use ``r pkg``, please cite:
## Installation

```R
if(!require("remotes")) install.packages("remotes")
if(!require("BiocManager")) install.packages("BiocManager")

remotes::install_github("`r paste(owner,repo,sep='/')`")
BiocManager::install("`r paste(owner,repo,sep='/')`")
library(`r pkg`)
```
## Documentation
Expand Down
34 changes: 18 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,33 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s
<br>
<a href='https://app.codecov.io/gh/neurogenomics/KGExplorer/tree/master' target='_blank'><img src='https://codecov.io/gh/neurogenomics/KGExplorer/branch/master/graphs/icicle.svg' title='Codecov icicle graph' width='200' height='50' style='vertical-align: top;'></a>
<h4>
Authors: <i>yourGivenName yourFamilyName</i>
Authors: <i>Brian Schilder</i>
</h4>
<h4>
README updated: <i>Dec-09-2023</i>
README updated: <i>Mar-08-2024</i>
</h4>

<!-- To modify Package/Title/Description/Authors fields, edit the DESCRIPTION file -->

## `KGExplorer`: Biomedical Knowledge Network Construction and Analysis

### Efficiently construct large-scale knowledge networks from biomedical repositories.
### Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies.

If you use `KGExplorer`, please cite:

<!-- Modify this by editing the file: inst/CITATION -->

> author1, author2, author3 (publicationYear) articleTitle,
> *journalName*; volumeNumber, [linkToPublication](linkToPublication)
> Kitty B. Murphy, Robert Gordon-Smith, Jai Chapman, Momoko Otani, Brian
> M. Schilder, Nathan G. Skene (2023) Identification of cell
> type-specific gene targets underlying thousands of rare diseases and
> subtraits. medRxiv, <https://doi.org/10.1101/2023.02.13.23285820>
## Installation

``` r
if(!require("remotes")) install.packages("remotes")
if(!require("BiocManager")) install.packages("BiocManager")

remotes::install_github("neurogenomics/KGExplorer")
BiocManager::install("neurogenomics/KGExplorer")
library(KGExplorer)
```

Expand All @@ -58,7 +60,7 @@ utils::sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.1.1
## Running under: macOS Sonoma 14.3.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
Expand All @@ -77,17 +79,17 @@ utils::sessionInfo()
## [1] gtable_0.3.4 jsonlite_1.8.8 renv_1.0.3
## [4] dplyr_1.1.4 compiler_4.3.1 BiocManager_1.30.22
## [7] tidyselect_1.2.0 rvcheck_0.2.1 scales_1.3.0
## [10] yaml_2.3.7 fastmap_1.1.1 here_1.0.1
## [10] yaml_2.3.8 fastmap_1.1.1 here_1.0.1
## [13] ggplot2_3.4.4 R6_2.5.1 generics_0.1.3
## [16] knitr_1.45 yulab.utils_0.1.0 tibble_3.2.1
## [19] desc_1.4.2 dlstats_0.1.7 rprojroot_2.0.4
## [16] knitr_1.45 yulab.utils_0.1.4 tibble_3.2.1
## [19] desc_1.4.3 dlstats_0.1.7 rprojroot_2.0.4
## [22] munsell_0.5.0 pillar_1.9.0 RColorBrewer_1.1-3
## [25] rlang_1.1.2 utf8_1.2.4 cachem_1.0.8
## [28] badger_0.2.3 xfun_0.41 fs_1.6.3
## [31] memoise_2.0.1.9000 cli_3.6.1 magrittr_2.0.3
## [34] rworkflows_1.0.2 digest_0.6.33 grid_4.3.1
## [25] rlang_1.1.3 utf8_1.2.4 cachem_1.0.8
## [28] badger_0.2.3 xfun_0.42 fs_1.6.3
## [31] memoise_2.0.1.9000 cli_3.6.2 magrittr_2.0.3
## [34] rworkflows_1.0.1 digest_0.6.34 grid_4.3.1
## [37] rstudioapi_0.15.0 lifecycle_1.0.4 vctrs_0.6.5
## [40] data.table_1.14.10 evaluate_0.23 glue_1.6.2
## [40] data.table_1.15.0 evaluate_0.23 glue_1.7.0
## [43] fansi_1.0.6 colorspace_2.1-0 rmarkdown_2.25
## [46] tools_4.3.1 pkgconfig_2.0.3 htmltools_0.5.7

Expand Down
2 changes: 1 addition & 1 deletion man/get_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/plot_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e0c2a3f

Please sign in to comment.