Skip to content

Commit

Permalink
Merge pull request #139 from danforthcenter/time_bug
Browse files Browse the repository at this point in the history
Time bug
  • Loading branch information
joshqsumner authored Dec 18, 2024
2 parents 60c03da + 249c898 commit ff89948
Show file tree
Hide file tree
Showing 13 changed files with 90 additions and 53 deletions.
57 changes: 38 additions & 19 deletions R/brmViolin.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#' use \link{combineDraws} to merge their draws into a single dataframe for testing.
#' @param ss A \code{pcvrss} object. The only component that is currently used is the pcvrForm.
#' @param hypothesis A hypothesis expressed as a character string in the style of that used by
#' \code{brms::hypothesis} and \link{testGrowth}. Note that currently this only supports hypotheses
#' using two parameters from the model at a time (ie, "groupA / groupB > 1.05" works but
#' "(groupA / groupB) - (groupC / groupD) > 1" does not). In the hypothesis "..." can be used to mean
#' \code{brms::hypothesis} and \link{testGrowth}. In the hypothesis "..." can be used to mean
#' "all groups for this parameter" so that the hypothesis "... / A_group1 > 1.05" would include all
#' the "A" coefficients for groups 1:N along the x axis, see examples.
#' the "A" coefficients for groups 1:N along the x axis, see examples. If a hypothesis is using
#' several parameters per group (second example) then math around those parameters and any ellipses
#' should be wrapped in parentheses. Note that currently the single hypothesis option (no ...)
#' only supports hypotheses using two parameters from the model at a time
#' (ie, "groupA / groupB > 1.05" works but "(groupA / groupB) - (groupC / groupD) > 1" does not).
#'
#' @keywords brms Bayesian pcvrss
#'
Expand All @@ -32,6 +34,8 @@
#'
#' fit <- fitGrowth(ss, backend = "cmdstanr", iter = 500, chains = 1, cores = 1)
#' brmViolin(fit, ss, ".../A_groupd > 1.05") # all groups used
#' brmViolin(fit, ss, "abs(1 - ((...)/(C_groupd - B_groupd))) > 0.05") # rather arbitrary
#' brmViolin(fit, ss, "abs(1 - ((...)/(C_groupa - B_groupd))) > 0.05") # totally arbitrary
#' brmViolin(fit, ss, "A_groupa/A_groupd > 1.05") # only these two groups
#' }
#'
Expand Down Expand Up @@ -78,7 +82,7 @@ brmViolin <- function(fit, ss, hypothesis) {
#' @keywords internal

.brmViolinEllipsis <- function(fitdf, ss, hypothesis) {
math_or_underscore <- "\\+|\\/|\\-|\\*|\\^|>|<|=|_"
math_or_underscore <- "\\+|\\/|\\-|\\*|\\^|>|<|=|_|\\(|\\)"
hsplit <- trimws(strsplit(hypothesis, math_or_underscore)[[1]])
hsplit <- hsplit[as.logical(nchar(hsplit))]
hsplit <- hsplit[!grepl("\\.\\.\\.", hsplit)]
Expand All @@ -88,24 +92,36 @@ brmViolin <- function(fit, ss, hypothesis) {
any(grepl(grp, hsplit))
})))
grouping <- grouping[which_grouping]
par <- hsplit[which(!grepl(grouping, hsplit))]
par <- hsplit[which(hsplit %in% names(ss$formula$pforms))]
ref_group <- hsplit[which(grepl(grouping, hsplit))]
draws <- fitdf[, grepl(paste0("^b_", par, ".*", grouping), colnames(fitdf))]
colnames(draws) <- sub("^b_", "", colnames(draws))
colnames(fitdf) <- sub("^b_", "", colnames(fitdf))
draws <- fitdf[, grepl(paste0(par, ".*", grouping, collapse = "|"), colnames(fitdf))]
if (!grepl("[:]", grouping)) {
draws <- draws[, !grepl("[:]", colnames(draws))] # drop interaction terms
}
if (length(par) > 0) {
colnames(draws) <- sub(paste0(par, "_"), "", colnames(draws))
colnames(draws) <- paste0(par, "_", colnames(draws))
transformation <- paste(par, ref_group, sep = "_")
if (length(ref_group) > 1) {
pat <- paste0("\\(", paste(paste(par, ref_group, sep = "_"), collapse = ".*"), "\\)")
transformation <- regmatches(hypothesis, gregexpr(pat, hypothesis))
}
hyps_df <- do.call(rbind, lapply(colnames(draws), function(grp) {
iter_hyp <- gsub("\\.{3,}", grp, hypothesis)
groupings <- unique(ss$df[[grouping]])
hyps_res <- lapply(groupings, function(grp) {
ellipse_replacement <- gsub(ref_group[1], paste0(grouping, grp), transformation)
transformed_posterior <- with(draws, expr = {eval(parse(text = ellipse_replacement))})
iter_hyp <- gsub("\\.{3,}", ellipse_replacement, hypothesis)
x <- as.data.frame(brms::hypothesis(draws, iter_hyp)$h)
x$ref_group <- ifelse(as.logical(length(par)), paste0(par, "_", ref_group), ref_group)
x$comparison <- grp
x[, c("Post.Prob", "ref_group", "comparison")]
}))
return(
list(
"hyp_df" = x[, c("Post.Prob", "ref_group", "comparison")],
"transformed_posterior" = transformed_posterior
)
)
})
hyps_df <- do.call(rbind, lapply(hyps_res, \(l) {l$hyp_df}))
transformed_draws <- as.data.frame(do.call(cbind, lapply(hyps_res, \(l) {l$transformed_posterior})))
colnames(transformed_draws) <- paste0(groupings)
hyps_df$discrete_post_prob <- factor(
ifelse(hyps_df$Post.Prob >= 0.99, "A",
ifelse(hyps_df$Post.Prob >= 0.95, "B",
Expand All @@ -116,12 +132,13 @@ brmViolin <- function(fit, ss, hypothesis) {
),
levels = c("A", "B", "C", "D", "E"), ordered = TRUE
)
longdraw <- as.data.frame(data.table::melt(data.table::as.data.table(draws),
measure.vars = colnames(draws), value.name = "draw"
longdraw <- as.data.frame(data.table::melt(data.table::as.data.table(transformed_draws),
measure.vars = colnames(transformed_draws), value.name = "draw"
))
longdraw$param <- substr(longdraw$variable, 1, 1)
longdraw$group <- sub(paste0(par, "_"), "", longdraw$variable)
longdraw$ref <- grepl(ref_group, longdraw$variable)
longdraw$group <- sub(paste0(paste0(par, "_"), collapse = "|"), "", longdraw$variable)
longdraw$ref <- grepl(paste0(unique(gsub(grouping, "", ref_group)), collapse = "|"),
longdraw$variable)
ldj <- merge(longdraw, hyps_df, by.x = c("variable"), by.y = c("comparison"))
return(ldj)
}
Expand All @@ -134,10 +151,12 @@ brmViolin <- function(fit, ss, hypothesis) {
math <- "\\+|\\/|\\-|\\*|\\^|>|<|=" # foreseeable math operators
hsplit <- trimws(strsplit(hypothesis, math)[[1]])
hsplit <- hsplit[suppressWarnings(is.na(as.numeric(hsplit)))]
hsplit <- hsplit[nchar(hsplit) > 0]
draws <- fitdf
colnames(draws) <- sub("^b_", "", colnames(draws))
draws <- draws[, hsplit]
hyps_df <- as.data.frame(brms::hypothesis(draws, hypothesis = hypothesis)$h)
# here I'd need to make the transformed draws if the hypothesis is weird like that?
hyps_df$ref_group <- hsplit[2]
hyps_df$comparison <- hsplit[1]
hyps_df <- rbind(
Expand Down
19 changes: 10 additions & 9 deletions R/bwtime.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
#' @param df Data frame to use, this can be in wide or long format.
#' @param mode One of "DAS", "DAP" or "DAE" (Days After Planting and Days After Emergence).
#' Defaults to adding all columns.
#' Note that if timeCol is not an integer then DAS is always returned.
#' Note that if timeCol is not numeric then DAS is always returned.
#' @param plantingDelay If `mode` includes "DAP" then `plantingDelay` is used to adjust "DAS"
#' @param phenotype If `mode` includes "DAE" then this is the phenotype used to classify emergence.
#' @param cutoff If `mode` inlcludes "DAE" then this value is used to classify emergence.
#' @param cutoff If `mode` includes "DAE" then this value is used to classify emergence.
#' Defaults to 1, meaning an image with a value of 1 or more for `phenotype` has "emerged".
#' @param timeCol Column of input time values, defaults to "timestamp".
#' If this is not an integer then it is assumed to be
#' If this is not numeric then it is assumed to be
#' a timestamp in the format of the format argument.
#' @param group Grouping variables to specify unique plants as a character vector.
#' This defaults to "Barcodes". These taken together should identify a unique plant across time,
#' although often "angle" or "rotation" should be added.
#' @param plot Logical, should plots of the new time variables be printed?
#' @param format An R POSIXct format, defaults to lemnatech standard format.
#' This is only used if timeCol is not an integer.
#' This is only used if timeCol is not a numeric.
#' @param traitCol Column with phenotype names, defaults to "trait".
#' This should generally not need to be changed from the default.
#' If this and valueCol are present in colnames(df) then the data
Expand All @@ -28,9 +28,10 @@
#' and plants were watered before being imaged or if you want to index days off of
#' midnight. This defaults to NULL but will take any value coercible to POSIXct by
#' \code{as.POSIXct(... , tz="UTC")} such as "2020-01-01 18:30:00"
#' @param digits Number of digits to round DAS to if timeCol is not numeric, defaults to 0.
#' @keywords DAS time ggplot
#' @import ggplot2
#' @return The input dataframe with new integer columns for different ways
#' @return The input dataframe with new numeric columns for different ways
#' of describing time in the experiment. If plot is TRUE then a ggplot is also returned as part of
#' a list.
#' @export
Expand Down Expand Up @@ -86,7 +87,7 @@
bw.time <- function(df = NULL, mode = c("DAS", "DAP", "DAE"), plantingDelay = NULL,
phenotype = NULL, cutoff = 1, timeCol = "timestamp",
group = "Barcodes", plot = TRUE, format = "%Y-%m-%d %H:%M:%S",
traitCol = "trait", valueCol = "value", index = NULL) {
traitCol = "trait", valueCol = "value", index = NULL, digits = 0) {
wide <- .detectWideData(df, traitCol, valueCol) # see bwoutliers

if (is.null(plantingDelay) && "DAP" %in% mode) {
Expand All @@ -96,7 +97,7 @@ bw.time <- function(df = NULL, mode = c("DAS", "DAP", "DAE"), plantingDelay = NU
mode <- mode[-which(mode == "DAE")]
}

formatNonIntegerTimeRes <- .formatNonIntegerTime(df, timeCol, format, index)
formatNonIntegerTimeRes <- .formatNonIntegerTime(df, timeCol, format, index, digits)
df <- formatNonIntegerTimeRes[["data"]]
timeCol <- formatNonIntegerTimeRes[["timeCol"]]

Expand Down Expand Up @@ -124,14 +125,14 @@ bw.time <- function(df = NULL, mode = c("DAS", "DAP", "DAE"), plantingDelay = NU
#' @keywords internal
#' @noRd

.formatNonIntegerTime <- function(df, timeCol, format, index) {
.formatNonIntegerTime <- function(df, timeCol, format, index, digits) {
if (!is.numeric(df[[timeCol]])) {
df[[timeCol]] <- as.POSIXct(strptime(df[[timeCol]], format = format))
beg <- as.POSIXct(index, tz = "UTC")
if (is.null(index)) {
beg <- min(df[[timeCol]], na.rm = TRUE)
}
df$DAS <- floor(as.numeric((df[[timeCol]] - beg) / 60 / 60 / 24))
df$DAS <- round(as.numeric((df[[timeCol]] - beg) / 60 / 60 / 24), digits = digits)
timeCol <- "DAS"
}
return(list("data" = df, "timeCol" = timeCol))
Expand Down
2 changes: 1 addition & 1 deletion R/frem.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ frem <- function(df, des, phenotypes, timeCol = NULL, cor = TRUE, returnData = F
dummyX <- TRUE
}
#* `Format a time column if non-integer`
formatted <- .formatNonIntegerTime(df, timeCol, format = time_format, index = NULL)
formatted <- .formatNonIntegerTime(df, timeCol, format = time_format, index = NULL, digits = 0)
df <- formatted$data
timeCol <- formatted$timeCol
#* `Make formulas`
Expand Down
4 changes: 2 additions & 2 deletions R/growthSS.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
#' of logistic increasing counts may be written as \code{model = "poisson: logistic"}.
#' @param form A formula describing the model. The left hand side should only be
#' the outcome variable (phenotype), and a cutoff if you are making a survival model (see details).
#' The right hand side needs at least the x variable
#' (typically time). Grouping is also described in this formula using roughly lme4
#' The right hand side needs at least the x variable (which should be numeric and is
#' typically time). Grouping is also described in this formula using roughly lme4
#' style syntax,with formulas like \code{y~time|individual/group} to show that predictors
#' should vary by \code{group} and autocorrelation between \code{individual:group}
#' interactions should be modeled. Note that autocorrelation is only modeled with the "brms"
Expand Down
4 changes: 3 additions & 1 deletion R/parsePcvrForm.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@
df <- df[!is.infinite(df[[x]]), ]
df <- df[!is.infinite(df[[y]]), ]
df <- df[!is.infinite(df[[hierarchical_predictor]]), ]
formatted <- .formatNonIntegerTime(df, x, format = "%Y-%m-%d %H:%M:%S", index = NULL)
formatted <- .formatNonIntegerTime(df, x,
format = "%Y-%m-%d %H:%M:%S",
index = NULL, digits = 2)
df <- formatted$data
x <- formatted$timeCol
},
Expand Down
9 changes: 6 additions & 3 deletions R/wue.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@
#' how efficient a plant is at maintaining size given some amount of water.
#' @keywords WUE
#' @import data.table
#' @return A data frame containing the bellwether watering data joined
#' to phenotype data with new columns for change in the phenotype,
#' change in the pre-watering weight, and pseudo-water use efficiency (pWUE).
#' @return A data frame containing the watering data and
#' to phenotype data with new columns for change in the phenotype (\code{pheno_diff}),
#' amount of water used (\code{total_water}) over the interval between phenotype measurements
#' (water added post to pre phenotype measurement), \code{start} and \code{end} times for the
#' interval as well as their difference (\code{timeLengthSeconds}), and pseudo water use efficiency
#' (\code{pWUE}).
#' @examples
#'
#' set.seed(123)
Expand Down
12 changes: 8 additions & 4 deletions man/brmViolin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 9 additions & 6 deletions man/bw.time.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/growthSS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions man/pwue.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tests/testthat/test-brmsModels.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ test_that("Logistic brms model pipeline", {
expect_s3_class(plot2, "ggplot")
plot2.5 <- brmViolin(fit, ss, "B_groupb/B_groupa > 1.05")
expect_s3_class(plot2.5, "ggplot")
plot2.75 <- brmViolin(fit, ss, "abs(1 - ((...)/(C_groupb - B_groupa))) > 0.05")
expect_s3_class(plot2.75, "ggplot")
ss2 <- growthSS(
model = "gompertz", form = y ~ time | id / group, sigma = "logistic",
list("A" = 130, "B" = 10, "C" = 1, "sigmaA" = 20, "sigmaB" = 10, "sigmaC" = 2),
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-long_mv_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ test_that("reading mv github data as long works", {
outRows = 3000
)
)
expect_equal(dim(mv_ag1), c(42480, 6))
expect_equal(dim(mv_ag1), c(42840, 6))

#* test EMD
images <- unique(mv$image)[1:10]
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-wide_mv_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ test_that("reading mv github data as long works", {
)
)

expect_equal(dim(mv_ag1), c(460, 184))
expect_equal(dim(mv_ag1), c(464, 184))

#* test EMD
images <- unique(mv$image)[1:10]
Expand Down

0 comments on commit ff89948

Please sign in to comment.