From 8c26db9466c43feb7ed644b2aa4e2c170774fc11 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Fri, 10 Jan 2025 12:58:41 +0000 Subject: [PATCH] Update README and main vignette --- DESCRIPTION | 2 +- README.Rmd | 70 ++++++++++++++++++----- README.md | 119 ++++++++++++++++++++++++++------------- vignettes/KGExplorer.Rmd | 88 +++++++++++++++++++++++++---- 4 files changed, 212 insertions(+), 67 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e61e7ee..d1c64e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -83,7 +83,7 @@ Remotes: github::RajLabMSSM/echogithub, github::RajLabMSSM/downloadR, github::kwstat/pals -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 VignetteBuilder: knitr License: GPL-3 Config/testthat/edition: 3 diff --git a/README.Rmd b/README.Rmd index ba1a0fc..5ba7307 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,35 +17,75 @@ owner <- strsplit(URL,"/")[[1]][4] repo <- strsplit(URL,"/")[[1]][5] ``` -## ``r pkg``: `r gsub("\n","",title)` +## Introduction -### `r gsub("\n","",description)` - -If you use ``r pkg``, please cite: +KGExplorer is an R package designed to facilitate seamless exploration, +manipulation, and visualization of biomedical knowledge graphs and ontologies. +By integrating robust tools for data retrieval, filtering, mapping, and graph +visualization, KGExplorer empowers researchers to work with complex biomedical +datasets from resources like the the +[Monarch Initiative](https://monarchinitiative.org) and +[Human Phenotype Ontology (HPO)](https://hpo.jax.org). With its modular design +and extensive caching support, the package streamlines workflows by reducing +redundant downloads and providing efficient tools for managing large-scale +biomedical data. - -> `r utils::citation(pkg)$textVersion` +Built on the foundations of powerful R libraries like tidygraph, data.table, +ggplot2, and plotly, KGExplorer offers intuitive functions to manipulate and +visualize knowledge graphs. Researchers can subset graphs, convert between graph +representations, map between diverse ID systems, and generate interactive 3D +visualizations of ontologies. KGExplorer is tailored for biomedical researchers +seeking an efficient, flexible toolkit to harness the potential of knowledge +graphs for scientific discovery. + +`KGExplorer` was developed by the [Neurogenomics +Lab](https://www.neurogenomics.co.uk/) at Imperial College London, along +with valuable feedback provided by the HPO team. This package is still +actively evolving and growing. Community engagement is welcome and any +suggestions can be submitted as an +[Issue](https://github.com/neurogenomics/KGExplorer/issues) or [Pull +Request](https://github.com/neurogenomics/KGExplorer/pulls). ## Installation +Within R: + ```R -if(!require("BiocManager")) install.packages("BiocManager") +if(!require("remotes")) install.packages("remotes") -BiocManager::install("`r paste(owner,repo,sep='/')`") +remotes::install_github("`r paste(owner,repo,sep='/')`") library(`r pkg`) ``` + ## Documentation -### [Website](https://`r owner`.github.io/`r repo`) -### [Getting started](https://`r owner`.github.io/`r repo`/articles/`r pkg`) +#### [Website](https://`r owner`.github.io/`r pkg`) +#### [Get started](https://`r owner`.github.io/`r pkg`/articles/`r pkg`) +#### [Docker/Singularity Container](https://`r owner`.github.io/`r pkg`/articles/docker.html) -
+## Citation + +If you use ``r pkg``, please cite: + + +> `r utils::citation(pkg)$textVersion` -## Session Info +## Contact -
-```{r} +### [Neurogenomics Lab](https://www.neurogenomics.co.uk) +UK Dementia Research Institute +Department of Brain Sciences +Faculty of Medicine +Imperial College London +[GitHub](https://github.com/neurogenomics) + +## Session Info + +
+ +```{r Session Info} utils::sessionInfo() ``` -
+
+
diff --git a/README.md b/README.md index 4d33fbd..e7a88d7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ KGExplorer
[![License: GPL-3](https://img.shields.io/badge/license-GPL--3-blue.svg)](https://cran.r-project.org/web/licenses/GPL-3) -[![](https://img.shields.io/badge/devel%20version-0.99.0-black.svg)](https://github.com/neurogenomics/KGExplorer) +[![](https://img.shields.io/badge/devel%20version-0.99.03-black.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/languages/code-size/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/last-commit/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer/commits/master)
[![R build @@ -15,14 +15,61 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s Authors: Brian Schilder

-README updated: Mar-08-2024 +README updated: Jan-10-2025

-## `KGExplorer`: Biomedical Knowledge Network Construction and Analysis +## Introduction + +KGExplorer is an R package designed to facilitate seamless exploration, +manipulation, and visualization of biomedical knowledge graphs and +ontologies. By integrating robust tools for data retrieval, filtering, +mapping, and graph visualization, KGExplorer empowers researchers to +work with complex biomedical datasets from resources like the the +[Monarch Initiative](https://monarchinitiative.org) and [Human Phenotype +Ontology (HPO)](https://hpo.jax.org). With its modular design and +extensive caching support, the package streamlines workflows by reducing +redundant downloads and providing efficient tools for managing +large-scale biomedical data. + +Built on the foundations of powerful R libraries like tidygraph, +data.table, ggplot2, and plotly, KGExplorer offers intuitive functions +to manipulate and visualize knowledge graphs. Researchers can subset +graphs, convert between graph representations, map between diverse ID +systems, and generate interactive 3D visualizations of ontologies. +KGExplorer is tailored for biomedical researchers seeking an efficient, +flexible toolkit to harness the potential of knowledge graphs for +scientific discovery. + +`KGExplorer` was developed by the [Neurogenomics +Lab](https://www.neurogenomics.co.uk/) at Imperial College London, along +with valuable feedback provided by the HPO team. This package is still +actively evolving and growing. Community engagement is welcome and any +suggestions can be submitted as an +[Issue](https://github.com/neurogenomics/KGExplorer/issues) or [Pull +Request](https://github.com/neurogenomics/KGExplorer/pulls). -### Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies. +## Installation + +Within R: + +``` r +if(!require("remotes")) install.packages("remotes") + +remotes::install_github("neurogenomics/KGExplorer") +library(KGExplorer) +``` + +## Documentation + +#### [Website](https://neurogenomics.github.io/KGExplorer) + +#### [Get started](https://neurogenomics.github.io/KGExplorer/articles/KGExplorer) + +#### [Docker/Singularity Container](https://neurogenomics.github.io/KGExplorer/articles/docker.html) + +## Citation If you use `KGExplorer`, please cite: @@ -33,22 +80,15 @@ If you use `KGExplorer`, please cite: > type-specific gene targets underlying thousands of rare diseases and > subtraits. medRxiv, -## Installation - -``` r -if(!require("BiocManager")) install.packages("BiocManager") - -BiocManager::install("neurogenomics/KGExplorer") -library(KGExplorer) -``` +## Contact -## Documentation - -### [Website](https://neurogenomics.github.io/KGExplorer) - -### [Getting started](https://neurogenomics.github.io/KGExplorer/articles/KGExplorer) +### [Neurogenomics Lab](https://www.neurogenomics.co.uk) -
+UK Dementia Research Institute +Department of Brain Sciences +Faculty of Medicine +Imperial College London +[GitHub](https://github.com/neurogenomics) ## Session Info @@ -58,13 +98,13 @@ library(KGExplorer) utils::sessionInfo() ``` - ## R version 4.3.1 (2023-06-16) - ## Platform: aarch64-apple-darwin20 (64-bit) - ## Running under: macOS Sonoma 14.3.1 + ## R version 4.4.2 (2024-10-31) + ## Platform: aarch64-apple-darwin20 + ## Running under: macOS Sequoia 15.2 ## ## Matrix products: default - ## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib - ## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0 + ## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib + ## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0 ## ## locale: ## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 @@ -76,21 +116,22 @@ utils::sessionInfo() ## [1] stats graphics grDevices utils datasets methods base ## ## loaded via a namespace (and not attached): - ## [1] gtable_0.3.4 jsonlite_1.8.8 renv_1.0.3 - ## [4] dplyr_1.1.4 compiler_4.3.1 BiocManager_1.30.22 - ## [7] tidyselect_1.2.0 rvcheck_0.2.1 scales_1.3.0 - ## [10] yaml_2.3.8 fastmap_1.1.1 here_1.0.1 - ## [13] ggplot2_3.4.4 R6_2.5.1 generics_0.1.3 - ## [16] knitr_1.45 yulab.utils_0.1.4 tibble_3.2.1 - ## [19] desc_1.4.3 dlstats_0.1.7 rprojroot_2.0.4 - ## [22] munsell_0.5.0 pillar_1.9.0 RColorBrewer_1.1-3 - ## [25] rlang_1.1.3 utf8_1.2.4 cachem_1.0.8 - ## [28] badger_0.2.3 xfun_0.42 fs_1.6.3 - ## [31] memoise_2.0.1.9000 cli_3.6.2 magrittr_2.0.3 - ## [34] rworkflows_1.0.1 digest_0.6.34 grid_4.3.1 - ## [37] rstudioapi_0.15.0 lifecycle_1.0.4 vctrs_0.6.5 - ## [40] data.table_1.15.0 evaluate_0.23 glue_1.7.0 - ## [43] fansi_1.0.6 colorspace_2.1-0 rmarkdown_2.25 - ## [46] tools_4.3.1 pkgconfig_2.0.3 htmltools_0.5.7 + ## [1] gtable_0.3.6 jsonlite_1.8.9 renv_1.0.11 + ## [4] dplyr_1.1.4 compiler_4.4.2 BiocManager_1.30.25 + ## [7] tidyselect_1.2.1 rvcheck_0.2.1 scales_1.3.0 + ## [10] yaml_2.3.10 fastmap_1.2.0 here_1.0.1 + ## [13] ggplot2_3.5.1 R6_2.5.1 generics_0.1.3 + ## [16] knitr_1.49 yulab.utils_0.1.9 tibble_3.2.1 + ## [19] desc_1.4.3 dlstats_0.1.7 munsell_0.5.1 + ## [22] rprojroot_2.0.4 pillar_1.10.1 RColorBrewer_1.1-3 + ## [25] rlang_1.1.4 badger_0.2.4 xfun_0.50 + ## [28] fs_1.6.5 cli_3.6.3 magrittr_2.0.3 + ## [31] rworkflows_1.0.3 digest_0.6.37 grid_4.4.2 + ## [34] rstudioapi_0.17.1 lifecycle_1.0.4 vctrs_0.6.5 + ## [37] evaluate_1.0.1 glue_1.8.0 data.table_1.16.4 + ## [40] colorspace_2.1-1 rmarkdown_2.29 tools_4.4.2 + ## [43] pkgconfig_2.0.3 htmltools_0.5.8.1 + +
diff --git a/vignettes/KGExplorer.Rmd b/vignettes/KGExplorer.Rmd index 2cf63ed..ac74794 100644 --- a/vignettes/KGExplorer.Rmd +++ b/vignettes/KGExplorer.Rmd @@ -17,22 +17,63 @@ pkg <- read.dcf("../DESCRIPTION", fields = "Package")[1] library(pkg, character.only = TRUE) ``` - -```R -library(`r pkg`) -``` +# Introduction + +KGExplorer is an R package designed for efficient exploration and manipulation +of biomedical knowledge graphs and ontologies. Its core functionalities include: + +- **Graph Manipulation and Visualization**: Tools like `plot_graph_3d()` and +`plot_ggnetwork()` enable intuitive visualization and analysis of ontological +data. + +- **Data Retrieval**: Functions such as `get_mondo_maps()` and `get_ontology()` +facilitate fetching data from prominent biomedical databases. + +- **ID Mapping**: Mapping utilities like `map_genes_monarch()` and `map_mondo()` +allow seamless conversion between various identifier systems. + +- **Graph Filtering**: With functions like `filter_graph()` and `filter_kg()`, +users can subset knowledge graphs to focus on relevant subsets. + +- **Graph Conversion**: Utilities such as `graph_to_dt()` and +`graph_to_plotly()` provide flexibility to transform knowledge graphs into +different data formats or visualization-ready structures. + +The package leverages robust R libraries such as `tidygraph`, `data.table`, and +popular plotting tools including `plotly` and `ggplot2`. KGExplorer adopts a +modular design, with dedicated functions for data retrieval, manipulation, +visualization, and utility operations. Comprehensive caching support ensures +efficient handling of large datasets, reducing the need for repeated downloads. # Use cases +The following examples illustrate how KGExplorer can be used to extract +meaningful insights from biomedical knowledge graphs. Each use case highlights +key functionalities of the package and demonstrates practical applications in +biomedical research. + +To explore additional functionalities and find detailed documentation for all +available functions, users can refer to the [KGExplorer reference guide](https://neurogenomics.github.io/KGExplorer/reference/index.html). + ## Extract disease/phenotype-cell type associations +The package provides functionality to extract associations between +diseases/phenotypes and cell types. This can help researchers understand the +relationships and interactions between different biological entities. + ```{r, eval=FALSE} +# Get the Monarch knowledge graph g <- get_monarch_kg() + +# Filter the graph to include only edges between diseases/phenotypes and cell +# types g2 <- filter_kg(g, to_categories = c("biolink:Disease", "biolink:PhenotypicFeature"), from_categories = "biolink:Cell") -plot_graph_visnetwork(g2, + +# Plot the filtered graph using visNetwork +plot_graph_visnetwork(g2, selectedBy = "id", label_var = "name", layout = "layout_nicely", @@ -42,31 +83,54 @@ plot_graph_visnetwork(g2, ## Assess known animal models of human phenotypes +The package includes tools to assess known animal models of human phenotypes, +which can be crucial for translational research and understanding disease +mechanisms. + ```{r, eval=FALSE} +# Map uPheno data dat <- map_upheno_data() + +# Plot the mapped uPheno data upheno_plots <- plot_upheno(dat) ``` ## Link diseases to phenotypes to genes to variants +Users can link diseases to phenotypes, genes, and variants, providing a +comprehensive view of the genetic and phenotypic landscape. This can aid in +identifying potential genetic markers and understanding the genetic basis of +diseases. + ```{r, eval=FALSE} +# Link Monarch data to create a graph with variant-disease, variant-phenotype, +# and variant-gene associations gm <- link_monarch(maps = list( - c("variant","disease"), - c("variant","phenotype"), - c("variant","gene") + c("variant", "disease"), + c("variant", "phenotype"), + c("variant", "gene") )) + +# Join the linked graph with the original graph gm2 <- tidygraph::graph_join(gm,g) + +# Filter the graph to include only specific categories of nodes gm2 <- filter_graph(gm2, node_filters = list(category=c("disease", "phenotype", "phenotypicfeature", - "gene","variant"))) -gm3 <- filter_graph(g = gm2, - size=20000) + "gene", + "variant"))) + +# Further filter the graph to limit the size +gm3 <- filter_graph(g = gm2, size = 20000) + +# Update vertex attributes for category and name igraph::vertex_attr(gm3,"category") <- - tolower(gsub("biolink:","",igraph::vertex_attr(gm3,"category"))) + tolower(gsub("biolink:","", igraph::vertex_attr(gm3,"category"))) igraph::vertex_attr(gm3,"name") <- igraph::vertex_attr(gm3,"id") +# Plot the final graph using visNetwork plot_graph_visnetwork(gm3, selectedBy = "category", label_var = "name",