Skip to content

Commit

Permalink
add get_top_phenos
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed May 17, 2024
1 parent 71d544b commit 98e7d0a
Show file tree
Hide file tree
Showing 13 changed files with 441 additions and 16 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ Suggests:
scales,
piggyback,
patchwork,
tidygraph
tidygraph,
colorspace
Remotes:
github::neurogenomics/KGExplorer
VignetteBuilder: knitr
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ export(filter_descendants)
export(get_gene_lists)
export(get_hpo)
export(get_hpo_id_direct)
export(get_top_phenos)
export(gpt_annot_check)
export(gpt_annot_class)
export(gpt_annot_codify)
export(gpt_annot_melt)
export(gpt_annot_plot)
Expand All @@ -48,6 +50,7 @@ export(newlines_to_definition)
export(per_branch_plot)
export(phenos_to_granges)
export(plot_evidence)
export(plot_top_phenos)
export(search_hpo)
import(KGExplorer)
import(data.table)
Expand All @@ -70,3 +73,4 @@ importFrom(tools,R_user_dir)
importFrom(utils,data)
importFrom(utils,download.file)
importFrom(utils,head)
importFrom(utils,tail)
2 changes: 1 addition & 1 deletion R/add_gpt_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' phenos2 <- add_gpt_annotations(phenos)
add_gpt_annotations <- function(phenos,
annot = gpt_annot_codify(
reset_tiers_dict=TRUE
reset_weights_dict=TRUE
)$annot_weighted,
annot_cols = names(annot)[
!names(annot) %in% c("hpo_id","hpo_name")
Expand Down
69 changes: 69 additions & 0 deletions R/get_top_phenos.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Get top phenotypes
#'
#' Get the most severe phenotypes per severity class:
#' Profound, Severe, Moderate, Mild.
#' The exception to this is the "Mild" class, where the \emph{least}
#' severe phenotypes will be taken instead of the most severe phenotypes.
#' @inheritParams add_ont_lvl
#' @inheritParams add_ancestor
#' @inheritParams plot_top_phenos
#' @importFrom utils head tail
#' @export
#' @examples
#' top_phenos <- get_top_phenos()
get_top_phenos <- function(res_class = gpt_annot_class(),
keep_ont_levels = seq(3,17),
keep_descendants = "Phenotypic abnormality",
n_per_class = 10,
annotation_order=NULL,
split_by_congenital=TRUE){
congenital_onset <- is_congenital <- severity_class <- value <- NULL;

if(is.null(annotation_order)){
weights_dict <- eval(formals(gpt_annot_codify)$weights_dict)
annotation_order <- gsub("_"," ",names(sort(unlist(weights_dict),
decreasing = TRUE)))
}
res_class[,is_congenital:=(congenital_onset %in% c(2,3))]
annot_melt <- data.table::melt.data.table(res_class,
id.vars = c("severity_class",
"is_congenital",
"hpo_id",
"hpo_name",
"severity_score_gpt"))
annot_melt <- annot_melt[!is.na(value),]
#### Filter out ont levels ####
annot_melt <- HPOExplorer::add_ancestor(annot_melt,
keep_descendants = keep_descendants)
data.table::setorderv(annot_melt,"severity_score_gpt",-1)
annot_melt <- HPOExplorer::add_ont_lvl(annot_melt,
keep_ont_levels = keep_ont_levels)
annot_melt$variable <- gsub("_", " ", annot_melt$variable)
annot_melt$variable <- factor(annot_melt$variable, levels = annotation_order)


if(split_by_congenital){
dat_congenital <- lapply(unique(annot_melt$severity_class), function(s){
sort_fun <- if(s=="mild") utils::tail else utils::head
annot_melt[severity_class== s &
is_congenital==TRUE,
sort_fun(.SD,n_per_class), by="variable"]
})|> data.table::rbindlist()
dat_noncongenital <- lapply(unique(annot_melt$severity_class), function(s){
sort_fun <- if(s=="mild") tail else head
annot_melt[severity_class== s &
is_congenital==FALSE,
sort_fun(.SD,n_per_class), by="variable"]
})|> data.table::rbindlist()
return(list(congenital=dat_congenital,
noncongenital=dat_noncongenital)
)
} else {
dat <- lapply(unique(annot_melt$severity_class), function(s){
sort_fun <- if(s=="mild") tail else head
annot_melt[severity_class== s,
sort_fun(.SD,n_per_class), by="variable"]
})|> data.table::rbindlist()
return(dat)
}
}
1 change: 1 addition & 0 deletions R/gpt_annot_check.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ gpt_annot_check <- function(annot = gpt_annot_read(),
checks <- list(
annot=annot,
annot_mean=annot_mean,
annot_stringent_mean=annot_stringent_mean,
consistency_count=consistency_count,
consistency_rate=consistency_rate,
consistency_stringent_count=consistency_count,
Expand Down
104 changes: 104 additions & 0 deletions R/gpt_annot_class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' GPT annotations: add severity class
#'
#' Convert severity annotations ot severity classes
#' (profound > severe > moderate > mild) to approximate those introduced in
#' \href{https://doi.org/10.1371/journal.pone.0114391}{Lazarin et al. 2014}.
#'
#' @param res_coded The output of list \link{gpt_annot_codify}.
#' @param tiers_dict A named list of severity tiers.
#' @param inclusion_values Numerically encoded annotation values
#' to count as hits. See the \code{gpt_annot_codify(code_dict=)}
#' argument defaults for the mappings.
#' Defaults to 2 ("often") and 3 ("always").
#' @param add_severity_score_gpt Whether to add a continuous severity score
#' as well.
#' @returns A data.table with severity classes, as well as severity
#' scores (0-100) which can be used to rank severity within each class.
#' @export
#' @examples
#' res_coded <- gpt_annot_codify()
#' res_class <- gpt_annot_class(res_coded)
gpt_annot_class <- function(res_coded = gpt_annot_codify(),
inclusion_values=c(2,3),
tiers_dict = list(
## Tier 1
death=1,
intellectual_disability=1,
# congenital_onset=1,
## Tier 2
impaired_mobility=2,
physical_malformations=2,
## Tier 3
blindness=3,
sensory_impairments=3,
immunodeficiency=3,
cancer=3,
## Tier 4
reduced_fertility=4
),
add_severity_score_gpt=TRUE){
severity_class <- NULL;
map_severity_class <- function(r,
tiers_dict,
inclusion_values,
return_score=FALSE){
variable <- hpo_name <- value <- NULL;

tiers <- unique(unlist(tiers_dict))
tier_scores <- lapply(stats::setNames(tiers,paste0("tier",tiers)),
function(x){
tx <- tiers_dict[unname(unlist(tiers_dict)==x)]
counts <- r[,sapply(.SD, function(v){
v %in% inclusion_values
}),
.SDcols = names(tx)]
list(
counts=counts,
proportion=sum(counts)/length(tx)
)
})
mean_proportion <- sapply(tier_scores, function(x)x$proportion)|>mean()
assigned_class <- if(sum(tier_scores$tier1$counts)>1){
c("profound"=mean_proportion)
} else if (sum(tier_scores$tier1$counts)>0 ||
sum(c(tier_scores$tier2$counts,tier_scores$tier3$counts))>3){
c("severe"=mean_proportion)
} else if(sum(tier_scores$tier3$counts)>0){
c("moderate"=mean_proportion)
} else{
c("mild"=mean_proportion)
}
if(return_score){
return(assigned_class)
} else{
return(names(assigned_class))
}
}
res_class <- data.table::copy(res_coded$annot_coded)
messager("Assigning severity classes.")
res_class[,severity_class:=map_severity_class(.SD,
inclusion_values = inclusion_values,
tiers_dict = tiers_dict), by=.I]
res_class[,severity_class:=factor(severity_class,
levels = c("profound","severe",
"moderate","mild"),
ordered = TRUE)]
# messager("Assigning severity class scores.")
# res_class[,severity_class_score:=map_severity_class(.SD,
# tiers_dict = tiers_dict,
# inclusion_values = inclusion_values,
# return_score = TRUE), by=.I]
if(isTRUE(add_severity_score_gpt) &&
"annot_weighted" %in% names(res_coded) &&
!"severity_score_gpt" %in% names(res_class)){
res_class <- merge(res_class,
res_coded$annot_weighted[,c("hpo_name",
"severity_score_gpt")])
data.table::setorderv(res_class,
c("severity_class","severity_score_gpt"),c(1,-1))
} else{
data.table::setorderv(res_class,
c("severity_class"))
}
return(res_class)
}
18 changes: 9 additions & 9 deletions R/gpt_annot_codify.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#' Check GPT phenotype annotations using a several metrics.
#' @param remove_duplicates Ensure only 1 row per phenotype.
#' @param code_dict Numerical encodings of annotation values.
#' @param tiers_dict Numerical encodings of annotation column.
#' @param reset_tiers_dict Override \code{tiers_dict} values and set all values
#' @param weights_dict Weights to be applied to each annotation metric.
#' @param reset_weights_dict Override \code{weights_dict} values and set all values
#' to 1. This will ensure that all annotations are unweighted.
#' @inheritParams gpt_annot_check
#' @inheritParams KGExplorer::filter_dt
Expand All @@ -24,9 +24,9 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
"often"=2,
"always"=3
),
tiers_dict=list(
weights_dict=list(
death=6,
intellectual_disability=5,
death=5,
impaired_mobility=4,
physical_malformations=3,
blindness=4,
Expand All @@ -36,18 +36,18 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
reduced_fertility=1,
congenital_onset=1
),
reset_tiers_dict=FALSE,
reset_weights_dict=FALSE,
filters=list()
){
severity_score_gpt <- hpo_name <- NULL;

d <- data.table::copy(annot)
if(isTRUE(reset_tiers_dict)) tiers_dict <- lapply(tiers_dict,function(x){1})
if(isTRUE(reset_weights_dict)) weights_dict <- lapply(weights_dict,function(x){1})
#### Ensure only 1 row/hpo_name by simply taking the first ####
if(isTRUE(remove_duplicates)){
d <- d[,utils::head(.SD,1), by=c("hpo_id","hpo_name")]
}
cols <- names(tiers_dict)
cols <- names(weights_dict)
#### Add levels ####
d <- d[,lapply(.SD,function(x){
factor(tolower(x),levels = rev(names(code_dict)), ordered = TRUE)
Expand All @@ -61,14 +61,14 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
max_score <-
sum(
max(code_dict, na.rm = TRUE) *
(max(unlist(tiers_dict))*length(tiers_dict))
(max(unlist(weights_dict))*length(weights_dict))
)
d_coded <- d[,lapply(.SD,FUN=function(x){
unlist(code_dict[tolower(x)])}),.SDcols = cols, by=c("hpo_id","hpo_name")]
d_weighted <- data.table::as.data.table(
lapply(stats::setNames(cols,cols),
function(co){
d_coded[[co]]*tiers_dict[[co]]
d_coded[[co]]*weights_dict[[co]]
})
)[,hpo_name:=d_coded$hpo_name][,severity_score_gpt:=(
rowSums(.SD,na.rm = TRUE)/max_score*100),
Expand Down
Loading

0 comments on commit 98e7d0a

Please sign in to comment.