Skip to content

Commit

Permalink
update add_disease
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Jul 16, 2024
1 parent 668eeca commit 7c2d7dd
Show file tree
Hide file tree
Showing 12 changed files with 280 additions and 72 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export(make_network_object)
export(make_network_plot)
export(make_phenos_dataframe)
export(make_tiers)
export(map_disease)
export(map_phenotypes)
export(newlines_to_definition)
export(per_branch_plot)
Expand Down
1 change: 0 additions & 1 deletion R/0docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ NULL
#' Functions to add metadata to data.table objects.
#' @family add_
#' @param agg_by Column to aggregate metadata by.
#' @param add_definitions Add disease definitions using \link{add_mondo}.
#' @param gpt_filters A named list of filters to apply to the GPT annotations.
#' @inheritParams main
#' @inheritParams make_
Expand Down
50 changes: 21 additions & 29 deletions R/add_disease.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#' @describeIn add_ add_
#' Add diseases
#'
#' Annotate each HPO term with diseases that they are associated with.
Expand All @@ -7,6 +6,8 @@
#' See
#' \href{https://hpo-annotation-qc.readthedocs.io/en/latest/annotationFormat.html}{
#' here for column descriptions}.
#' @inheritParams add_
#' @inheritParams map_disease
#'
#' @export
#' @importFrom data.table merge.data.table
Expand All @@ -15,40 +16,31 @@
#' phenos <- example_phenos()
#' phenos2 <- add_disease(phenos = phenos)
add_disease <- function(phenos,
# extra_cols = c("Evidence","Reference","Biocuration"),
phenotype_to_genes = load_phenotype_to_genes(),
hpo = get_hpo(),
extra_cols = NULL,
all.x = TRUE,
allow.cartesian = FALSE,
add_definitions = FALSE){
use_api=FALSE,
workers=NULL,
allow.cartesian = FALSE){
if(!"hpo_id" %in% names(phenos)){
stp <- paste("hpo_id column must be present in phenos.")
stop(stp)
}
if(!all(c("disease_name","disease_id") %in% names(phenos))){
messager("Annotating phenos with Disease")
annot <- load_phenotype_to_genes(3)
#### From disease_id ####
if("disease_name" %in% names(phenos)){
return(phenos)
}
#### From hpo_id alone ####
by <- c("hpo_id","disease_id")
by <- by[by %in% names(phenos)]
#### Ensure there's only 1 row per Disease ####
annot <- annot[,unique(c("hpo_id","disease_name","disease_id",
extra_cols)),
with=FALSE][,.SD[1], by=c("disease_id","hpo_id")]
#### Merge ####
phenos <- data.table::merge.data.table(
phenos,
annot,
by = by,
all.x = all.x,
allow.cartesian = allow.cartesian)
}
#### Add disease definitions and Mondo ID mappings ####
if(isTRUE(add_definitions)){
phenos <- add_mondo(phenos = phenos)
if(!"disease_id" %in% names(phenos)){
phenos <- add_genes(phenos = phenos,
phenotype_to_genes = phenotype_to_genes,
hpo = hpo,
all.x = all.x,
allow.cartesian = allow.cartesian)
}
phenos <- map_disease(dat=phenos,
id_col="disease_id",
fields=c("disease"),
use_api=use_api,
return_dat=TRUE,
all.x = all.x,
allow.cartesian = allow.cartesian,
workers=workers)
return(phenos)
}
14 changes: 4 additions & 10 deletions R/add_genes.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ add_genes <- function(phenos = NULL,
phenotype_to_genes =
load_phenotype_to_genes(),
hpo = get_hpo(),
by = c("hpo_id","disease_id"),
by = c("hpo_id","hpo_name",
"disease_id","disease_name","disease_description"),
gene_col = "gene_symbol",
all.x = FALSE,
allow.cartesian = FALSE){
Expand All @@ -44,19 +45,12 @@ add_genes <- function(phenos = NULL,
#### Ensure necessary columns are in phenos ####
phenos <- add_hpo_id(phenos = phenos,
hpo = hpo)
phenos <- add_disease(phenos = phenos,
allow.cartesian = allow.cartesian)
#### Add Gene col to data ####
if(!"gene_symbol" %in% names(phenos)){
by <- by[by %in% names(phenos)]
## Get gene annotations
annot <- unique(
phenotype_to_genes[,unique(c(by,"gene_symbol","ncbi_gene_id")),
with=FALSE]
)
## Merge with input data
# ## Merge with input data
phenos <- data.table::merge.data.table(phenos,
annot,
phenotype_to_genes,
by = by,
all.x = all.x,
allow.cartesian = allow.cartesian)
Expand Down
16 changes: 16 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,19 @@
#' @format data.table
#' @usage data("disease_id_to_omop")
"disease_id_to_omop"

#' Human Phenotype Ontology: Disease ID to Disease Name and Disease Description
#'
#' @description
#' Mapping of HPO disease ID (disease_id) to Disease Name and
#' Disease Description.
#' @source
#' \code{
#' dat <- load_phenotype_to_genes()
#' out <- map_disease(dat)
#' disease_map <- out[,list(disease_id,disease_name,disease_description)]|>unique()
#' usethis::use_data(disease_map, overwrite = TRUE)
#' }
#' @format data.table
#' @usage data("disease_map")
"disease_map"
101 changes: 101 additions & 0 deletions R/map_disease.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#' Map disease
#'
#' Map disease IDs (e.g. "OMIM:101200") to names (e.g. "Apert syndrome")
#' @inheritParams add_
#' @param dat A data.table with a column of disease IDs.
#' @param id_col The name of the column with the disease IDs.
#' @param fields The fields to extract from the API response.
#' @param return_dat Return the data.table with the mapped fields.
#' @param use_api Use the API to get the disease names and descriptions.
#' Otherwise, use a cached data.table (\code{disease_map}).
#' @inheritParams KGExplorer::set_cores
#' @source \href{https://ontology.jax.org/api/network/docs}{HPO API docs}
#' @source \href{https://github.com/obophenotype/human-phenotype-ontology/issues/10232}{HPO GitHub Issue}
#' @export
#' @examples
#' dat <- HPOExplorer::load_phenotype_to_genes()
#' out <- map_disease(dat = dat, workers=1)
map_disease <- function(dat,
id_col="disease_id",
fields=c("disease","categories","genes")[1],
use_api=TRUE,
return_dat=FALSE,
workers=NULL,
all.x = TRUE,
allow.cartesian = FALSE
){
# res <- httr::GET(
# "https://ontology.jax.org/api/network/annotation/OMIM%3A101200",
# httr::add_headers(accept = "application/json")
# )
# cont <- httr::content(res)

# Define the URL and headers
if(!id_col %in% names(dat)){
stop("id_col not found in dat.")
}
if(!all(c("disease_name","disease_description") %in% names(dat))){
messager("Adding disease_name and disease_description.")
#### Slow but up-to-date ####
if(use_api || length(fields)>1){
map_disease_i <- function(id){
url <- URLencode(
## encode URL
paste0("https://ontology.jax.org/api/network/annotation/",id)
)
## get content
cont <- jsonlite::fromJSON(url)
## Extract names
cont$disease <- data.table::as.data.table(cont$disease)
cont$disease|> data.table::setnames(
c("name","description","mondoId"),
c("disease_name","disease_description","mondo_id"))
cont$genes <- data.table::as.data.table(cont$genes)
cont$categories <- lapply(cont$categories, function(x){
data.table::data.table(x)
})|>
data.table::rbindlist(idcol = "hpo_group", fill=TRUE) |>
data.table::setnames(c("id","name"),c("hpo_id","hpo_name"))
names(cont$categories) <- gsub("[.]","_",names(cont$categories))

#### Return ####
if(length(fields)==1){
return(cont[[fields]])
} else {
return(cont)
}
}
#### Iterate ####
ids <- unique(dat[[id_col]])
BPPARAM <- KGExplorer::set_cores(workers = workers)
res <- BiocParallel::bplapply(X = stats::setNames(ids,ids),
FUN = map_disease_i,
BPPARAM = BPPARAM)
if(length(fields)==1){
res <- data.table::rbindlist(res, fill=TRUE)
if(return_dat){
dat <- merge(dat, res,
by.x=id_col,
by.y = "id",
all.x = TRUE)
return(dat)
}
}
return(res)
#### Fast but potentially out-of-date ####
} else{
disease_map <- KGExplorer::get_data_package(name = "disease_map",
package = "HPOExplorer")
dat <- data.table::merge.data.table(
dat,
disease_map,
by = "disease_id",
all.x = all.x,
allow.cartesian = allow.cartesian)
return(dat)
}
} else {
messager("disease_name and disease_description already in dat.")
return(dat)
}
}
Binary file added data/disease_map.rda
Binary file not shown.
38 changes: 7 additions & 31 deletions man/add_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions man/add_disease.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7c2d7dd

Please sign in to comment.