diff --git a/README.Rmd b/README.Rmd index ba1a0fc..5ba7307 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,35 +17,75 @@ owner <- strsplit(URL,"/")[[1]][4] repo <- strsplit(URL,"/")[[1]][5] ``` -## ``r pkg``: `r gsub("\n","",title)` +## Introduction -### `r gsub("\n","",description)` - -If you use ``r pkg``, please cite: +KGExplorer is an R package designed to facilitate seamless exploration, +manipulation, and visualization of biomedical knowledge graphs and ontologies. +By integrating robust tools for data retrieval, filtering, mapping, and graph +visualization, KGExplorer empowers researchers to work with complex biomedical +datasets from resources like the the +[Monarch Initiative](https://monarchinitiative.org) and +[Human Phenotype Ontology (HPO)](https://hpo.jax.org). With its modular design +and extensive caching support, the package streamlines workflows by reducing +redundant downloads and providing efficient tools for managing large-scale +biomedical data. - -> `r utils::citation(pkg)$textVersion` +Built on the foundations of powerful R libraries like tidygraph, data.table, +ggplot2, and plotly, KGExplorer offers intuitive functions to manipulate and +visualize knowledge graphs. Researchers can subset graphs, convert between graph +representations, map between diverse ID systems, and generate interactive 3D +visualizations of ontologies. KGExplorer is tailored for biomedical researchers +seeking an efficient, flexible toolkit to harness the potential of knowledge +graphs for scientific discovery. + +`KGExplorer` was developed by the [Neurogenomics +Lab](https://www.neurogenomics.co.uk/) at Imperial College London, along +with valuable feedback provided by the HPO team. This package is still +actively evolving and growing. Community engagement is welcome and any +suggestions can be submitted as an +[Issue](https://github.com/neurogenomics/KGExplorer/issues) or [Pull +Request](https://github.com/neurogenomics/KGExplorer/pulls). ## Installation +Within R: + ```R -if(!require("BiocManager")) install.packages("BiocManager") +if(!require("remotes")) install.packages("remotes") -BiocManager::install("`r paste(owner,repo,sep='/')`") +remotes::install_github("`r paste(owner,repo,sep='/')`") library(`r pkg`) ``` + ## Documentation -### [Website](https://`r owner`.github.io/`r repo`) -### [Getting started](https://`r owner`.github.io/`r repo`/articles/`r pkg`) +#### [Website](https://`r owner`.github.io/`r pkg`) +#### [Get started](https://`r owner`.github.io/`r pkg`/articles/`r pkg`) +#### [Docker/Singularity Container](https://`r owner`.github.io/`r pkg`/articles/docker.html) -
+## Citation + +If you use ``r pkg``, please cite: + + +> `r utils::citation(pkg)$textVersion` -## Session Info +## Contact -
-```{r} +### [Neurogenomics Lab](https://www.neurogenomics.co.uk) +UK Dementia Research Institute +Department of Brain Sciences +Faculty of Medicine +Imperial College London +[GitHub](https://github.com/neurogenomics) + +## Session Info + +
+ +```{r Session Info} utils::sessionInfo() ``` -
+
+
diff --git a/README.md b/README.md index a70d374..d76cf7b 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,61 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s Authors: Brian Schilder, Hiranyamaya Dash

-README updated: Dec-18-2024 +README updated: Jan-10-2025

-## `KGExplorer`: Biomedical Knowledge Network Construction and Analysis +## Introduction + +KGExplorer is an R package designed to facilitate seamless exploration, +manipulation, and visualization of biomedical knowledge graphs and +ontologies. By integrating robust tools for data retrieval, filtering, +mapping, and graph visualization, KGExplorer empowers researchers to +work with complex biomedical datasets from resources like the the +[Monarch Initiative](https://monarchinitiative.org) and [Human Phenotype +Ontology (HPO)](https://hpo.jax.org). With its modular design and +extensive caching support, the package streamlines workflows by reducing +redundant downloads and providing efficient tools for managing +large-scale biomedical data. + +Built on the foundations of powerful R libraries like tidygraph, +data.table, ggplot2, and plotly, KGExplorer offers intuitive functions +to manipulate and visualize knowledge graphs. Researchers can subset +graphs, convert between graph representations, map between diverse ID +systems, and generate interactive 3D visualizations of ontologies. +KGExplorer is tailored for biomedical researchers seeking an efficient, +flexible toolkit to harness the potential of knowledge graphs for +scientific discovery. + +`KGExplorer` was developed by the [Neurogenomics +Lab](https://www.neurogenomics.co.uk/) at Imperial College London, along +with valuable feedback provided by the HPO team. This package is still +actively evolving and growing. Community engagement is welcome and any +suggestions can be submitted as an +[Issue](https://github.com/neurogenomics/KGExplorer/issues) or [Pull +Request](https://github.com/neurogenomics/KGExplorer/pulls). -### Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies. +## Installation + +Within R: + +``` r +if(!require("remotes")) install.packages("remotes") + +remotes::install_github("neurogenomics/KGExplorer") +library(KGExplorer) +``` + +## Documentation + +#### [Website](https://neurogenomics.github.io/KGExplorer) + +#### [Get started](https://neurogenomics.github.io/KGExplorer/articles/KGExplorer) + +#### [Docker/Singularity Container](https://neurogenomics.github.io/KGExplorer/articles/docker.html) + +## Citation If you use `KGExplorer`, please cite: @@ -33,22 +80,15 @@ If you use `KGExplorer`, please cite: > type-specific gene targets underlying thousands of rare diseases and > subtraits. medRxiv, -## Installation - -``` r -if(!require("BiocManager")) install.packages("BiocManager") - -BiocManager::install("neurogenomics/KGExplorer") -library(KGExplorer) -``` +## Contact -## Documentation - -### [Website](https://neurogenomics.github.io/KGExplorer) - -### [Getting started](https://neurogenomics.github.io/KGExplorer/articles/KGExplorer) +### [Neurogenomics Lab](https://www.neurogenomics.co.uk) -
+UK Dementia Research Institute +Department of Brain Sciences +Faculty of Medicine +Imperial College London +[GitHub](https://github.com/neurogenomics) ## Session Info @@ -94,3 +134,5 @@ utils::sessionInfo() ## [46] htmltools_0.5.8.1 + +
diff --git a/vignettes/KGExplorer.Rmd b/vignettes/KGExplorer.Rmd index 2cf63ed..ac74794 100644 --- a/vignettes/KGExplorer.Rmd +++ b/vignettes/KGExplorer.Rmd @@ -17,22 +17,63 @@ pkg <- read.dcf("../DESCRIPTION", fields = "Package")[1] library(pkg, character.only = TRUE) ``` - -```R -library(`r pkg`) -``` +# Introduction + +KGExplorer is an R package designed for efficient exploration and manipulation +of biomedical knowledge graphs and ontologies. Its core functionalities include: + +- **Graph Manipulation and Visualization**: Tools like `plot_graph_3d()` and +`plot_ggnetwork()` enable intuitive visualization and analysis of ontological +data. + +- **Data Retrieval**: Functions such as `get_mondo_maps()` and `get_ontology()` +facilitate fetching data from prominent biomedical databases. + +- **ID Mapping**: Mapping utilities like `map_genes_monarch()` and `map_mondo()` +allow seamless conversion between various identifier systems. + +- **Graph Filtering**: With functions like `filter_graph()` and `filter_kg()`, +users can subset knowledge graphs to focus on relevant subsets. + +- **Graph Conversion**: Utilities such as `graph_to_dt()` and +`graph_to_plotly()` provide flexibility to transform knowledge graphs into +different data formats or visualization-ready structures. + +The package leverages robust R libraries such as `tidygraph`, `data.table`, and +popular plotting tools including `plotly` and `ggplot2`. KGExplorer adopts a +modular design, with dedicated functions for data retrieval, manipulation, +visualization, and utility operations. Comprehensive caching support ensures +efficient handling of large datasets, reducing the need for repeated downloads. # Use cases +The following examples illustrate how KGExplorer can be used to extract +meaningful insights from biomedical knowledge graphs. Each use case highlights +key functionalities of the package and demonstrates practical applications in +biomedical research. + +To explore additional functionalities and find detailed documentation for all +available functions, users can refer to the [KGExplorer reference guide](https://neurogenomics.github.io/KGExplorer/reference/index.html). + ## Extract disease/phenotype-cell type associations +The package provides functionality to extract associations between +diseases/phenotypes and cell types. This can help researchers understand the +relationships and interactions between different biological entities. + ```{r, eval=FALSE} +# Get the Monarch knowledge graph g <- get_monarch_kg() + +# Filter the graph to include only edges between diseases/phenotypes and cell +# types g2 <- filter_kg(g, to_categories = c("biolink:Disease", "biolink:PhenotypicFeature"), from_categories = "biolink:Cell") -plot_graph_visnetwork(g2, + +# Plot the filtered graph using visNetwork +plot_graph_visnetwork(g2, selectedBy = "id", label_var = "name", layout = "layout_nicely", @@ -42,31 +83,54 @@ plot_graph_visnetwork(g2, ## Assess known animal models of human phenotypes +The package includes tools to assess known animal models of human phenotypes, +which can be crucial for translational research and understanding disease +mechanisms. + ```{r, eval=FALSE} +# Map uPheno data dat <- map_upheno_data() + +# Plot the mapped uPheno data upheno_plots <- plot_upheno(dat) ``` ## Link diseases to phenotypes to genes to variants +Users can link diseases to phenotypes, genes, and variants, providing a +comprehensive view of the genetic and phenotypic landscape. This can aid in +identifying potential genetic markers and understanding the genetic basis of +diseases. + ```{r, eval=FALSE} +# Link Monarch data to create a graph with variant-disease, variant-phenotype, +# and variant-gene associations gm <- link_monarch(maps = list( - c("variant","disease"), - c("variant","phenotype"), - c("variant","gene") + c("variant", "disease"), + c("variant", "phenotype"), + c("variant", "gene") )) + +# Join the linked graph with the original graph gm2 <- tidygraph::graph_join(gm,g) + +# Filter the graph to include only specific categories of nodes gm2 <- filter_graph(gm2, node_filters = list(category=c("disease", "phenotype", "phenotypicfeature", - "gene","variant"))) -gm3 <- filter_graph(g = gm2, - size=20000) + "gene", + "variant"))) + +# Further filter the graph to limit the size +gm3 <- filter_graph(g = gm2, size = 20000) + +# Update vertex attributes for category and name igraph::vertex_attr(gm3,"category") <- - tolower(gsub("biolink:","",igraph::vertex_attr(gm3,"category"))) + tolower(gsub("biolink:","", igraph::vertex_attr(gm3,"category"))) igraph::vertex_attr(gm3,"name") <- igraph::vertex_attr(gm3,"id") +# Plot the final graph using visNetwork plot_graph_visnetwork(gm3, selectedBy = "category", label_var = "name",