From a30bdd441f7431ce6742b3f7929ff79ce7100156 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:35:39 +0000 Subject: [PATCH] Port function from HPOExplorer to avoid circular dependency --- DESCRIPTION | 1 - NAMESPACE | 1 + NEWS.md | 7 ++--- R/get_hpo.R | 43 ++++++++++++++++++++++++++++++ R/map_upheno_data_i.R | 20 +++++++------- README.md | 4 +-- man/get_.Rd | 62 ++++++++++++++++++++++++++++++------------- 7 files changed, 104 insertions(+), 34 deletions(-) create mode 100644 R/get_hpo.R diff --git a/DESCRIPTION b/DESCRIPTION index eaf227d..95f7a27 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,7 +73,6 @@ Suggests: BiocGenerics, readxl, htmlwidgets, - HPOExplorer, sparklyr, httr, jsonlite, diff --git a/NAMESPACE b/NAMESPACE index 8111e39..20f5822 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(get_gencc) export(get_gene_lengths) export(get_genes_disease) export(get_graph_colnames) +export(get_hpo) export(get_monarch) export(get_monarch_files) export(get_monarch_kg) diff --git a/NEWS.md b/NEWS.md index 8447174..90ad69a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,12 @@ # KGExplorer 0.99.05 +## New features +* `get_hpo` + - Port function from `HPOExplorer` package to prevent circular dependency. + ## Bug fixes * `DESCRIPTION` - Update remote for `monarchr`. - - Add `HPOExplorer` to Suggests and remotes. - - Add `KGExplorer` (self) to remote to fix circular dependency with - `HPOExplorer` installation. # KGExplorer 0.99.04 diff --git a/R/get_hpo.R b/R/get_hpo.R new file mode 100644 index 0000000..00f526c --- /dev/null +++ b/R/get_hpo.R @@ -0,0 +1,43 @@ +#' @describeIn get_ get_ +#' Get Human Phenotype Ontology (HPO) +#' +#' Updated version of Human Phenotype Ontology (HPO). +#' Created from the OBO files distributed by the HPO project's +#' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. +#' Adapted from \link[HPOExplorer]{get_hpo}. +#' +#' By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. +#' Note that the maximum ontology level depth in the 2016 version was 14, +#' whereas in the 2023 version the maximum ontology level depth is 16 +#' (due to an expansion of the HPO). +#' @inheritParams get_ontology +#' @inheritDotParams get_ontology +#' @returns \link[simona]{ontology_DAG} object. +#' +#' @export +#' @examples +#' hpo <- get_hpo() +get_hpo <- function(lvl = 2, + force_new = FALSE, + terms=NULL, + ## rols imports the international version for some reason + method="github", + save_dir=cache_dir(package = "KGExplorer"), + ...){ + + file <- file.path(save_dir,"hp.rds") + if(!file.exists(file) || isTRUE(force_new)){ + ont <- get_ontology(name = "hp", + lvl = lvl, + force_new = force_new, + terms = terms, + method = method, + save_dir = save_dir, + ...) + saveRDS(ont,file) + } else { + ont <- readRDS(file) + } + ont <- filter_ontology(ont = ont, terms = terms) + return(ont) +} diff --git a/R/map_upheno_data_i.R b/R/map_upheno_data_i.R index f104977..da5104a 100644 --- a/R/map_upheno_data_i.R +++ b/R/map_upheno_data_i.R @@ -12,7 +12,7 @@ map_upheno_data_i <- function(pheno_map_method, n_genes_db1 <- object <- gene_label <- db <- . <- n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <- phenotype_genotype_score <- equivalence_score <- NULL; - + pheno_map_method <- pheno_map_method[1] gene_map_method <- gene_map_method[1] messager(paste0("map_upheno_data: pheno_map_method=", @@ -26,10 +26,10 @@ map_upheno_data_i <- function(pheno_map_method, names(pheno_map) <-gsub("^object","id2",names(pheno_map)) pheno_map[,db1:=gsub("*:.*","",basename(id1))] } else if(pheno_map_method=="monarch"){ - - hpo <- HPOExplorer::get_hpo() + + hpo <- get_hpo() out <- monarchr::monarch_search(query = NULL, - category = "biolink:PhenotypicFeature", + category = "biolink:PhenotypicFeature", limit = 500) pheno_map <- get_monarch(queries = "phenotype_to_phenotype") |> data.table::setnames(c("label_x","label_y"),c("label1","label2")) @@ -47,7 +47,7 @@ map_upheno_data_i <- function(pheno_map_method, } } } - + ## Gene-phenotype associations across 8 species { genes <- get_monarch(maps = list(c("phenotype","gene")), @@ -65,7 +65,7 @@ map_upheno_data_i <- function(pheno_map_method, ## Create an db-species map for each Ontology species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)] } - + #### Map non-human genes onto human orthologs #### { genes_homol <- map_genes_monarch(dat=genes, @@ -75,7 +75,7 @@ map_upheno_data_i <- function(pheno_map_method, data.table::uniqueN(genes$subject_taxon_label), "species remain after cross-species gene mapping.") } - + #### Map non-human phenotypes onto human phenotypes #### #### Merge nonhuman ontology genes with human HPO genes #### { @@ -94,7 +94,7 @@ map_upheno_data_i <- function(pheno_map_method, all.y = keep_nogenes, suffixes = c(1,2), allow.cartesian = TRUE - ) + ) pheno_map_genes[,db2:=id2_db] ## Fill in missing species for those without gene data pheno_map_genes[ @@ -113,7 +113,7 @@ map_upheno_data_i <- function(pheno_map_method, ## Remove # remove(genes_human,genes_nonhuman,pheno_map) } - + #### Count the number of overlapping genes { if(isFALSE(keep_nogenes)){ @@ -154,4 +154,4 @@ map_upheno_data_i <- function(pheno_map_method, ## less than or equal to the number of total HPO genes. # pheno_map_genes_match[n_genes>n_genes_hpo,] return(pheno_map_genes_match) -} \ No newline at end of file +} diff --git a/README.md b/README.md index c680fe0..a70d374 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ KGExplorer
[![License: GPL-3](https://img.shields.io/badge/license-GPL--3-blue.svg)](https://cran.r-project.org/web/licenses/GPL-3) -[![](https://img.shields.io/badge/devel%20version-0.99.04-black.svg)](https://github.com/neurogenomics/KGExplorer) +[![](https://img.shields.io/badge/devel%20version-0.99.05-black.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/languages/code-size/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer) [![](https://img.shields.io/github/last-commit/neurogenomics/KGExplorer.svg)](https://github.com/neurogenomics/KGExplorer/commits/master)
[![R build @@ -15,7 +15,7 @@ status](https://github.com/neurogenomics/KGExplorer/workflows/rworkflows/badge.s Authors: Brian Schilder, Hiranyamaya Dash

-README updated: Dec-17-2024 +README updated: Dec-18-2024

diff --git a/man/get_.Rd b/man/get_.Rd index 1e3e879..8d92860 100644 --- a/man/get_.Rd +++ b/man/get_.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/0docs.R, R/get_alphamissense.R, % R/get_clinvar.R, R/get_data_package.R, R/get_definitions.R, R/get_gencc.R, % R/get_gene_lengths.R, R/get_genes_disease.R, R/get_graph_colnames.R, -% R/get_medgen_maps.R, R/get_metadata_omim.R, R/get_metadata_orphanet.R, -% R/get_monarch.R, R/get_monarch_files.R, R/get_monarch_kg.R, -% R/get_monarch_models.R, R/get_mondo_maps.R, R/get_mondo_maps_files.R, -% R/get_ols_options.R, R/get_ontology.R, R/get_ontology_dict.R, -% R/get_ontology_levels.R, R/get_pli.R, R/get_prevalence.R, R/get_ttd.R, -% R/get_upheno.R, R/get_version.R +% R/get_hpo.R, R/get_medgen_maps.R, R/get_metadata_omim.R, +% R/get_metadata_orphanet.R, R/get_monarch.R, R/get_monarch_files.R, +% R/get_monarch_kg.R, R/get_monarch_models.R, R/get_mondo_maps.R, +% R/get_mondo_maps_files.R, R/get_ols_options.R, R/get_ontology.R, +% R/get_ontology_dict.R, R/get_ontology_levels.R, R/get_pli.R, +% R/get_prevalence.R, R/get_ttd.R, R/get_upheno.R, R/get_version.R \name{get_} \alias{get_} \alias{get_alphamissense} @@ -18,6 +18,7 @@ \alias{get_gene_lengths} \alias{get_genes_disease} \alias{get_graph_colnames} +\alias{get_hpo} \alias{get_medgen_maps} \alias{get_metadata_omim} \alias{get_metadata_orphanet} @@ -85,6 +86,15 @@ get_genes_disease( get_graph_colnames(g, what = c("nodes", "edges")) +get_hpo( + lvl = 2, + force_new = FALSE, + terms = NULL, + method = "github", + save_dir = cache_dir(package = "KGExplorer"), + ... +) + get_medgen_maps() get_metadata_omim(save_dir = cache_dir()) @@ -243,7 +253,7 @@ Passes to \link{get_monarch_files}.} \item{run_map_mondo}{Run \link{map_mondo} to map MONDO IDs to disease IDs.} \item{...}{ - Arguments passed on to \code{\link[=link_monarch]{link_monarch}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[=get_ontology_github]{get_ontology_github}} + Arguments passed on to \code{\link[=link_monarch]{link_monarch}}, \code{\link[=get_ontology]{get_ontology}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[data.table:fread]{data.table::fread}}, \code{\link[=get_ontology_github]{get_ontology_github}} \describe{ \item{\code{node_filters}}{A named list of filters to apply to the node data. Names should be name of the metadata column, and values should be a vector of @@ -294,6 +304,17 @@ return any rows where the "type" column contains either "gene" or "variant".} \item{what}{What should get activated? Possible values are \code{nodes} or \code{edges}.} +\item{lvl}{Depth of the ancestor terms to add. +Will get the closest ancestor to this level if none have this exact level.} + +\item{terms}{A vector of ontology term IDs.} + +\item{method}{Compute ontology levels using: +\itemize{ + \item{"height" (default)} \link[simona]{dag_height}. + \item{"depth"} \link[simona]{dag_depth}. +}} + \item{queries}{A list of free-form substring queries to filter files by (using any column in the metadata). For example, \code{list("gene_disease","variant_disease")} will return any @@ -338,24 +359,13 @@ Can be a character vector of one or more column names.} \item{ol}{An \link[rols]{Ontologies} object.} -\item{method}{Compute ontology levels using: -\itemize{ - \item{"height" (default)} \link[simona]{dag_height}. - \item{"depth"} \link[simona]{dag_depth}. -}} - \item{filetype}{File type to search for.} \item{import_func}{Function to import the ontology with. If \code{NULL}, automatically tries to choose the correct function.} -\item{terms}{A vector of ontology term IDs.} - \item{add_metadata}{Add metadata to the resulting ontology object.} -\item{lvl}{Depth of the ancestor terms to add. -Will get the closest ancestor to this level if none have this exact level.} - \item{add_n_edges}{Add the number of edges (connections) for each term.} \item{add_ontology_levels}{Add the ontology level for each term.} @@ -420,6 +430,8 @@ A named list of data.tables of AlphaMissense predictions. data.table +\link[simona]{ontology_DAG} object. + \link[data.table]{data.table} \link[data.table]{data.table} of mappings. @@ -504,6 +516,19 @@ Load gene lists associated with each disease phenotype from: \item \code{get_graph_colnames()}: get_ Get column names in the nodes and/or edges of a tbl_graph. +\item \code{get_hpo()}: get_ +Get Human Phenotype Ontology (HPO) + +Updated version of Human Phenotype Ontology (HPO). +Created from the OBO files distributed by the HPO project's +\href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. +Adapted from \link[HPOExplorer]{get_hpo}. + +By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. +Note that the maximum ontology level depth in the 2016 version was 14, +whereas in the 2023 version the maximum ontology level depth is 16 + (due to an expansion of the HPO). + \item \code{get_medgen_maps()}: get_ Get MedGen maps. @@ -625,6 +650,7 @@ ont <- get_ontology("hp", terms=10) def <- get_definitions(ont) d <- get_gencc() genes <- get_genes_disease() +hpo <- get_hpo() dat <- get_monarch(maps=list(c("gene","disease"))) files <- get_monarch_files() \dontrun{