From 007a5a3a9b8e3408a41b73e7200b43d4b2e1e683 Mon Sep 17 00:00:00 2001
From: Sam Abbott <s.e.abbott12@gmail.com>
Date: Fri, 7 Feb 2025 18:22:27 +0000
Subject: [PATCH] Issue #175: Add distribution look up tables and associated
 tooling (#195)

* add first pass at dist lookup

* add look ups and tests second pass

* fix checks

* update news entry to be easier to read

* improve docs for dist and primary id

* close testing gaps

* update family for datasets
---
 .pre-commit-config.yaml                 |   6 +-
 NAMESPACE                               |   2 +
 NEWS.md                                 |   4 ++
 R/data.R                                |  33 ++++++++++
 R/pcd-stan-tools.R                      |  36 +++++++++++
 R/pcd_cmdstan_model.R                   |  17 +++--
 R/pcens.R                               |   8 ++-
 R/pprimarycensored.R                    |   6 +-
 R/utils.R                               |  82 ++++++++++++++++++++++--
 data-raw/distributions.R                |  34 ++++++++++
 data/pcd_distributions.rda              | Bin 0 -> 458 bytes
 data/pcd_primary_distributions.rda      | Bin 0 -> 282 bytes
 inst/WORDLIST                           |   1 +
 man/add_name_attribute.Rd               |  10 ++-
 man/pcd_as_stan_data.Rd                 |  11 ++--
 man/pcd_dist_name.Rd                    |  37 +++++++++++
 man/pcd_distributions.Rd                |  33 ++++++++++
 man/pcd_load_stan_functions.Rd          |   1 +
 man/pcd_primary_distributions.Rd        |  31 +++++++++
 man/pcd_stan_dist_id.Rd                 |  37 +++++++++++
 man/pcd_stan_files.Rd                   |   1 +
 man/pcd_stan_functions.Rd               |   1 +
 man/pcd_stan_path.Rd                    |   1 +
 man/pcens_cdf.Rd                        |   6 +-
 man/pcens_cdf.default.Rd                |   2 +-
 man/pprimarycensored.Rd                 |   6 +-
 tests/testthat/test-pcd_dist_name.R     |  23 +++++++
 tests/testthat/test-pcd_stan_dist_id.R  |  32 +++++++++
 tests/testthat/test-suggest_dist_name.R |  39 +++++++++++
 vignettes/fitting-dists-with-stan.Rmd   |   4 +-
 30 files changed, 465 insertions(+), 39 deletions(-)
 create mode 100644 R/data.R
 create mode 100644 data-raw/distributions.R
 create mode 100644 data/pcd_distributions.rda
 create mode 100644 data/pcd_primary_distributions.rda
 create mode 100644 man/pcd_dist_name.Rd
 create mode 100644 man/pcd_distributions.Rd
 create mode 100644 man/pcd_primary_distributions.Rd
 create mode 100644 man/pcd_stan_dist_id.Rd
 create mode 100644 tests/testthat/test-pcd_dist_name.R
 create mode 100644 tests/testthat/test-pcd_stan_dist_id.R
 create mode 100644 tests/testthat/test-suggest_dist_name.R

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4a228d52..65127e2e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # R specific hooks: https://github.com/lorenzwalthert/precommit
 repos:
 -   repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.4.3
+    rev: v0.4.3.9003
     hooks:
     -   id: style-files
         args: [--style_pkg=styler, --style_fun=tidyverse_style]
@@ -14,7 +14,7 @@ repos:
     -   id: no-browser-statement
     -   id: deps-in-desc
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
     -   id: check-added-large-files
         args: ['--maxkb=200']
@@ -32,7 +32,7 @@ repos:
     -   id: check-hooks-apply
     -   id: check-useless-excludes
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
     -   id: trailing-whitespace
     -   id: check-yaml
diff --git a/NAMESPACE b/NAMESPACE
index 0df8b3c3..bd2ce648 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,7 +15,9 @@ export(fitdistdoublecens)
 export(new_pcens)
 export(pcd_as_stan_data)
 export(pcd_cmdstan_model)
+export(pcd_dist_name)
 export(pcd_load_stan_functions)
+export(pcd_stan_dist_id)
 export(pcd_stan_files)
 export(pcd_stan_functions)
 export(pcd_stan_path)
diff --git a/NEWS.md b/NEWS.md
index b51d9dff..cf94b2dd 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,10 @@ Development release.
 
 - Updated the CI so that stan code is also tested on Windows and Mac. This is to ensure that the stan code is compatible with these platforms and in response to a CI bug in `epidist`.
 - Revised approach to automatic discovery of distribution functions. This soft deprecates the `pdist_name` and `dprimary_name` arguments throughout. Users wishing to pass distribution names (i.e. to potentially leverage analytical solutions) are advised to use the newly introduced `add_name_attribute()` function. Adds transient dependency on `lifecycle` and `rlang` packages. See #188 by @pearsonca.
+- Added `pcd_stan_dist_id()` to allow for discovery of distribution IDs for Stan models.
+- Added `pcd_dist_name()` to allow for discovery of distribution names for R functions as needed for `add_name_attribute()`.
+- Added documentation to suggest the use of `methods(pcens_cdf)` to find analytical solutions.
+- Added new data sets `pcd_distributions` and `pcd_primary_distributions` to document the distributions supported by `primarycensored`.
 
 ## Documentation
 
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 00000000..a97cc1a2
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,33 @@
+#' Supported delay distributions
+#'
+#' A dataset containing information about the supported delay distributions in
+#' primarycensored. Includes both distributions with base R implementations and
+#' those only available in Stan. Distributions beyond these are not supported
+#' in the stan code but any user functions can be used in the R code.
+#'
+#' @format A data.frame with 17 rows and 4 columns:
+#' \describe{
+#'   \item{name}{Distribution name}
+#'   \item{pdist}{R distribution function name (e.g. plnorm), NA if there is no
+#'   base R implementation}
+#'   \item{aliases}{Alternative names/identifiers}
+#'   \item{stan_id}{Stan distribution ID used in the stan code}
+#' }
+#' @family utils
+"pcd_distributions"
+
+#' Supported primary event distributions
+#'
+#' A dataset containing information about the supported primary event
+#' distributions in primarycensored. Distributions beyond these are not
+#' supported in the stan code but any user functions can be used in the R code.
+#'
+#' @format A data.frame with 2 rows and 4 columns:
+#' \describe{
+#'   \item{name}{Distribution name}
+#'   \item{dprimary}{R density function name}
+#'   \item{aliases}{Alternative names/identifiers}
+#'   \item{stan_id}{Stan distribution ID used in the stan code}
+#' }
+#' @family utils
+"pcd_primary_distributions"
diff --git a/R/pcd-stan-tools.R b/R/pcd-stan-tools.R
index dac46b26..71249f77 100644
--- a/R/pcd-stan-tools.R
+++ b/R/pcd-stan-tools.R
@@ -227,3 +227,39 @@ pcd_load_stan_functions <- function(
 
   return(result)
 }
+
+#' Get distribution stan ID by name
+#'
+#' @param name String. Distribution name or alias
+#' @param type String. "delay" or "primary" corresponding to the type of
+#'  distribution to use as the look up. If delay then [pcd_distributions()]
+#'  is used, if primary then [pcd_primary_distributions()] is used.
+#'
+#' @return Numeric distribution ID
+#' @export
+#' @family stantools
+#' @examples
+#' pcd_stan_dist_id("lnorm")
+#' pcd_stan_dist_id("lognormal")
+#' pcd_stan_dist_id("gamma")
+#' pcd_stan_dist_id("weibull")
+#' pcd_stan_dist_id("exp")
+#' pcd_stan_dist_id("unif", type = "primary")
+pcd_stan_dist_id <- function(name, type = c("delay", "primary")) {
+  type <- match.arg(type)
+  df <- switch(type,
+    delay = primarycensored::pcd_distributions,
+    primary = primarycensored::pcd_primary_distributions
+  )
+
+  match_idx <- which(df$name == name | df$aliases == name)
+
+  if (length(match_idx) == 0) {
+    stop(
+      "No ", type, " distribution found matching: ", name, "\n",
+      .suggest_dist_name(name, type)
+    )
+  }
+
+  df$stan_id[match_idx]
+}
diff --git a/R/pcd_cmdstan_model.R b/R/pcd_cmdstan_model.R
index e959e1b3..d2b40bf8 100644
--- a/R/pcd_cmdstan_model.R
+++ b/R/pcd_cmdstan_model.R
@@ -65,17 +65,16 @@ pcd_cmdstan_model <- function(
 #' @param pwindow Column name for primary window (default: "pwindow")
 #'
 #' @param relative_obs_time Column name for relative observation time
-#' (default: "relative_obs_time")
+#'  (default: "relative_obs_time")
 #'
 #' @param dist_id Integer identifying the delay distribution:
-#'   1 = Lognormal, 2 = Gamma, 3 = Weibull, 4 = Exponential,
-#'   5 = Generalized Gamma, 6 = Negative Binomial, 7 = Poisson,
-#'   8 = Bernoulli, 9 = Beta, 10 = Binomial, 11 = Categorical, 12 = Cauchy,
-#'   13 = Chi-square, 14 = Dirichlet, 15 = Gumbel, 16 = Inverse Gamma,
-#'   17 = Logistic
+#'   You can use [pcd_stan_dist_id()] to get the dist ID for a
+#'   distribution or look at the [pcd_distributions] data set.
 #'
 #' @param primary_id Integer identifying the primary distribution:
-#'   1 = Uniform, 2 = Exponential growth
+#'   You can use [pcd_stan_dist_id()] to get the primary dist ID for a
+#'   distribution (make sure to select the "primary" type) or look at the
+#'   [pcd_primary_distributions] data set.
 #'
 #' @param param_bounds A list with elements `lower` and `upper`, each a numeric
 #'   vector specifying bounds for the delay distribution parameters.
@@ -92,14 +91,14 @@ pcd_cmdstan_model <- function(
 #' @param compute_log_lik Logical; compute log likelihood? (default: FALSE)
 #'
 #' @param use_reduce_sum Logical; use reduce_sum for performance?
-#'  (default: FALSE)
+#'   (default: FALSE)
 #'
 #' @param truncation_check_multiplier Numeric multiplier to use for checking
 #'   if the truncation time D is appropriate relative to the maximum delay
 #'   for each unique D value. Set to NULL to skip the check. Default is 2.
 #'
 #' @return A list containing the data formatted for use with
-#' [pcd_cmdstan_model()]
+#'   [pcd_cmdstan_model()]
 #'
 #' @export
 #' @family modelhelpers
diff --git a/R/pcens.R b/R/pcens.R
index 952b0280..acdad0f2 100644
--- a/R/pcens.R
+++ b/R/pcens.R
@@ -37,6 +37,12 @@ new_pcens <- function(
 
 #' Compute primary event censored CDF
 #'
+#' This function dispatches to either analytical solutions (if available) or
+#' numerical integration via the default method. To see which combinations have
+#' analytical solutions implemented, use `methods(pcens_cdf)`. For example,
+#' `pcens_cdf.gamma_unif` indicates an analytical solution exists for gamma
+#' delay with uniform primary event distributions.
+#'
 #' @inheritParams pprimarycensored
 #'
 #' @param object A `primarycensored` object as created by
@@ -61,7 +67,7 @@ pcens_cdf <- function(
 #'
 #' This method serves as a fallback for combinations of delay and primary
 #' event distributions that don't have specific implementations. It uses
-#' the numeric integration method.
+#' a numeric integration method.
 #'
 #' @inheritParams pcens_cdf
 #' @inheritParams pprimarycensored
diff --git a/R/pprimarycensored.R b/R/pprimarycensored.R
index 79711f6e..57c8053b 100644
--- a/R/pprimarycensored.R
+++ b/R/pprimarycensored.R
@@ -81,10 +81,8 @@
 #' for automatic use of analytical solutions when available, while
 #' seamlessly falling back to numerical integration when necessary.
 #'
-#' Note: For analytical detection to work correctly, `pdist` and `dprimary`
-#' must be directly passed as distribution functions, not via assignment or
-#' `pdist_name` and `dprimary_name` must be used to override the default
-#' extraction of the function name.
+#' See `methods(pcens_cdf)` for which combinations have analytical
+#' solutions implemented.
 #'
 #' @family primarycensored
 #' @seealso [new_pcens()] and [pcens_cdf()]
diff --git a/R/utils.R b/R/utils.R
index c7320a4a..867bcdc3 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -28,7 +28,9 @@
 #' detected by [pprimarycensored()] and related functions.
 #'
 #' This is useful as it enables the automatic use of analytical solutions for
-#' distributions where they exist.
+#' distributions where they exist. You can check which analytical solutions are
+#' available using `methods(pcens_cdf)` and check distribution names using
+#' [pcd_dist_name()].
 #'
 #' @param func Function, for example the `p`- or `d`- form of a distribution
 #' function.
@@ -84,10 +86,9 @@ add_name_attribute <- function(func, name) {
 #'
 #' @keywords internal
 .name_deprecation <- function(
-  pdist_name, dprimary_name,
-  env = rlang::caller_env(),
-  user_env = rlang::caller_env(2)
-) {
+    pdist_name, dprimary_name,
+    env = rlang::caller_env(),
+    user_env = rlang::caller_env(2)) {
   test_use <- c(
     lifecycle::is_present(pdist_name),
     lifecycle::is_present(dprimary_name)
@@ -97,8 +98,7 @@ add_name_attribute <- function(func, name) {
     lifecycle::deprecate_soft(
       when = "1.0.0",
       what = I("`pdist_name` and `dprimary_name` are deprecated across all
-        functions and will be ignored in future versions;"
-      ),
+        functions and will be ignored in future versions;"),
       details = "Use `add_name_attribute()` instead.",
       env = env, user_env = user_env
     )
@@ -107,3 +107,71 @@ add_name_attribute <- function(func, name) {
   }
   return(res)
 }
+
+#' Get distribution function cdf or pdf name
+#'
+#' @param name String. Distribution name or alias
+#' @param type String. "delay" or "primary" corresponding to the type of
+#'  distribution to use as the look up. If delay then [pcd_distributions()]
+#'  is used, if primary then [pcd_primary_distributions()] is used.
+#'
+#' @return String distribution function name or NA if no base R implementation
+#' @export
+#' @family utils
+#' @examples
+#' pcd_dist_name("lnorm")
+#' pcd_dist_name("lognormal")
+#' pcd_dist_name("gamma")
+#' pcd_dist_name("weibull")
+#' pcd_dist_name("exp")
+#' pcd_dist_name("unif", type = "primary")
+#' pcd_dist_name("expgrowth", type = "primary")
+pcd_dist_name <- function(name, type = c("delay", "primary")) {
+  type <- match.arg(type)
+  df <- switch(type,
+    delay = primarycensored::pcd_distributions,
+    primary = primarycensored::pcd_primary_distributions
+  )
+
+  match_idx <- which(df$name == name | df$aliases == name)
+
+  if (length(match_idx) == 0) {
+    stop(
+      "No ", type, " distribution found matching: ", name, "\n",
+      .suggest_dist_name(name, type)
+    )
+  }
+
+  if (type == "delay") {
+    df$pdist[match_idx]
+  } else {
+    df$dprimary[match_idx]
+  }
+}
+
+#' @keywords internal
+.suggest_dist_name <- function(input, type = "delay") {
+  dist_names <- switch(type,
+    delay = primarycensored::pcd_distributions$name,
+    primary = primarycensored::pcd_primary_distributions$name
+  )
+
+  distances <- utils::adist(input, dist_names)
+  min_dist <- min(distances)
+  candidates <- dist_names[which(distances == min_dist)]
+
+  if (min_dist <= 2 && length(candidates) > 0) {
+    suggestions <- paste0(
+      "Did you mean: ",
+      toString(unique(candidates)),
+      "?"
+    )
+  } else {
+    suggestions <- paste0(
+      "Available distributions:",
+      toString(unique(dist_names))
+    )
+  }
+
+  return(suggestions)
+}
diff --git a/data-raw/distributions.R b/data-raw/distributions.R
new file mode 100644
index 00000000..30070285
--- /dev/null
+++ b/data-raw/distributions.R
@@ -0,0 +1,34 @@
+pcd_distributions <- data.frame(
+  name = c(
+    "lnorm", "gamma", "weibull", "exp", "gengamma", "nbinom",
+    "pois", "bern", "beta", "binom", "cat", "cauchy", "chisq",
+    "dirich", "gumbel", "invgamma", "logis"
+  ),
+  pdist = c(
+    "plnorm", "pgamma", "pweibull", "pexp", NA, "pnbinom",
+    "ppois", NA, "pbeta", "pbinom", NA, "pcauchy", "pchisq",
+    NA, "pgumbel", NA, "plogis"
+  ),
+  aliases = c(
+    "lognormal", "gamma", "weibull", "exponential", "generalized gamma",
+    "negative binomial", "poisson", "bernoulli", "beta", "binomial",
+    "categorical", "cauchy", "chi-square", "dirichlet", "gumbel",
+    "inverse gamma", "logistic"
+  ),
+  stan_id = 1:17,
+  stringsAsFactors = FALSE
+)
+
+pcd_primary_distributions <- data.frame(
+  name = c("unif", "expgrowth"),
+  dprimary = c("dunif", "dexpgrowth"),
+  aliases = c("uniform", "exponential growth"),
+  stan_id = 1:2,
+  stringsAsFactors = FALSE
+)
+
+usethis::use_data(
+  pcd_distributions,
+  pcd_primary_distributions,
+  overwrite = TRUE
+)
diff --git a/data/pcd_distributions.rda b/data/pcd_distributions.rda
new file mode 100644
index 0000000000000000000000000000000000000000..6b9a4f43dcd63c22bbfb8986f024577afaa0ad16
GIT binary patch
literal 458
zcmV;*0X6<YT4*^jL0KkKS;W+E9{>SR|H1$JXaGP1C_u#k7C^t}|L{Nn00F=OumPE@
zOJSy<srqG4Q`GS^)cq;FNE!nPf_jfhr>H<vXeh{N22B6}000DvAd|$%(<Jo(XaS+4
zOn}KP-_do<85yhhzH)52tuLYpOwt<!wfcJ1?<iL_vE1Tb%2ZaIo2T7kOA|))YL@=V
zM3O9Gw>DZfY|Se1`s9l%B7|$@w|k?43kR@;(llm63Y6U)B2gk!cm_5J79dH2Lru28
zK-dszAX@an2rdHMwnLuSY>0;b6634ijW<YV1cFe>UX~}%AmqUkZ`>W=`OAuT&WBN;
zn!1JmQ-qE8%u6j(*IFxwi!@a;>4q9|3%RcxxxG?J+8NMR9Pr68CyaSa_>9gr?3eHh
zZ(z6unyoL4>xM01aGu<9=qtc=4l#05OF7gWAqw$H6QnM+y1B8et=f~FVhIM5i-wLW
zFQ+{D4v|=0RIO?>2PMHWXIoy-ABe7sG<B(UDTE%Csm;qx2ywDDRs<{yWQ5W%l#Q~u
z)q$}LBp^1j$%-L7?jff{8{=LI6ii7??v==<^Y+`kfrE+x=STRvk}1N3h9;wc__grV
AegFUf

literal 0
HcmV?d00001

diff --git a/data/pcd_primary_distributions.rda b/data/pcd_primary_distributions.rda
new file mode 100644
index 0000000000000000000000000000000000000000..345870c67d4953a858fede5646634fac62bacb19
GIT binary patch
literal 282
zcmV+#0p<QeT4*^jL0KkKS>YMco&W%%f5HCqSRg<H2tdUE7C^t}{@_3W00BS&umJXL
zLqt)uCZ40z$%(Zz4H{_B8&gdKLYWchPezl}13&{n0qFuoLNw8!0000q05mQbVo9R{
z@s6<EqLMlinwH{Gk(dFV8)~9=Adv)_VtEi0SaFKJXoLz@iDiZc=aLxapf}ml3@j9g
z1|*towtyQ57$w702vEM;ru!b)>pncBwyn0!?)IO@biLxn(R0(l)WW`cu{nX>6;j4@
zARUYIkgjO;d<ve~$pR^kajsB+eK*eKK#s{S(3VOA{#s3?iew@wa45p3JtmjTQq2if
gg#3v_%!6iNGL)jexC8ucGza`$$rRy2Lxg8Wcpkxhy8r+H

literal 0
HcmV?d00001

diff --git a/inst/WORDLIST b/inst/WORDLIST
index 420cbd24..4e76cd46 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -66,6 +66,7 @@ pcd
 pcens
 pdist
 pearsonca
+plnorm
 pprimarycensored
 precalculation
 primaryeventdistributions
diff --git a/man/add_name_attribute.Rd b/man/add_name_attribute.Rd
index fa856712..91d27832 100644
--- a/man/add_name_attribute.Rd
+++ b/man/add_name_attribute.Rd
@@ -27,10 +27,18 @@ name, you can use this helper function to set it in a way that will be
 detected by \code{\link[=pprimarycensored]{pprimarycensored()}} and related functions.
 
 This is useful as it enables the automatic use of analytical solutions for
-distributions where they exist.
+distributions where they exist. You can check which analytical solutions are
+available using \code{methods(pcens_cdf)} and check distribution names using
+\code{\link[=pcd_dist_name]{pcd_dist_name()}}.
 }
 \examples{
 dist <- add_name_attribute(pnorm, "hello")
 attr(dist, "name")
 }
+\seealso{
+Utility functions for working with distributions
+\code{\link{pcd_dist_name}()},
+\code{\link{pcd_distributions}},
+\code{\link{pcd_primary_distributions}}
+}
 \concept{utils}
diff --git a/man/pcd_as_stan_data.Rd b/man/pcd_as_stan_data.Rd
index 45cca8a7..6cbdd752 100644
--- a/man/pcd_as_stan_data.Rd
+++ b/man/pcd_as_stan_data.Rd
@@ -38,14 +38,13 @@ pcd_as_stan_data(
 (default: "relative_obs_time")}
 
 \item{dist_id}{Integer identifying the delay distribution:
-1 = Lognormal, 2 = Gamma, 3 = Weibull, 4 = Exponential,
-5 = Generalized Gamma, 6 = Negative Binomial, 7 = Poisson,
-8 = Bernoulli, 9 = Beta, 10 = Binomial, 11 = Categorical, 12 = Cauchy,
-13 = Chi-square, 14 = Dirichlet, 15 = Gumbel, 16 = Inverse Gamma,
-17 = Logistic}
+You can use \code{\link[=pcd_stan_dist_id]{pcd_stan_dist_id()}} to get the dist ID for a
+distribution or look at the \link{pcd_distributions} data set.}
 
 \item{primary_id}{Integer identifying the primary distribution:
-1 = Uniform, 2 = Exponential growth}
+You can use \code{\link[=pcd_stan_dist_id]{pcd_stan_dist_id()}} to get the primary dist ID for a
+distribution (make sure to select the "primary" type) or look at the
+\link{pcd_primary_distributions} data set.}
 
 \item{param_bounds}{A list with elements \code{lower} and \code{upper}, each a numeric
 vector specifying bounds for the delay distribution parameters.}
diff --git a/man/pcd_dist_name.Rd b/man/pcd_dist_name.Rd
new file mode 100644
index 00000000..84049151
--- /dev/null
+++ b/man/pcd_dist_name.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{pcd_dist_name}
+\alias{pcd_dist_name}
+\title{Get distribution function cdf or pdf name}
+\usage{
+pcd_dist_name(name, type = c("delay", "primary"))
+}
+\arguments{
+\item{name}{String. Distribution name or alias}
+
+\item{type}{String. "delay" or "primary" corresponding to the type of
+distribution to use as the look up. If delay then \code{\link[=pcd_distributions]{pcd_distributions()}}
+is used, if primary then \code{\link[=pcd_primary_distributions]{pcd_primary_distributions()}} is used.}
+}
+\value{
+String distribution function name or NA if no base R implementation
+}
+\description{
+Get distribution function cdf or pdf name
+}
+\examples{
+pcd_dist_name("lnorm")
+pcd_dist_name("lognormal")
+pcd_dist_name("gamma")
+pcd_dist_name("weibull")
+pcd_dist_name("exp")
+pcd_dist_name("unif", type = "primary")
+pcd_dist_name("expgrowth", type = "primary")
+}
+\seealso{
+Utility functions for working with distributions
+\code{\link{add_name_attribute}()},
+\code{\link{pcd_distributions}},
+\code{\link{pcd_primary_distributions}}
+}
+\concept{utils}
diff --git a/man/pcd_distributions.Rd b/man/pcd_distributions.Rd
new file mode 100644
index 00000000..ed471769
--- /dev/null
+++ b/man/pcd_distributions.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{pcd_distributions}
+\alias{pcd_distributions}
+\title{Supported delay distributions}
+\format{
+A data.frame with 17 rows and 4 columns:
+\describe{
+\item{name}{Distribution name}
+\item{pdist}{R distribution function name (e.g. plnorm), NA if there is no
+base R implementation}
+\item{aliases}{Alternative names/identifiers}
+\item{stan_id}{Stan distribution ID used in the stan code}
+}
+}
+\usage{
+pcd_distributions
+}
+\description{
+A dataset containing information about the supported delay distributions in
+primarycensored. Includes both distributions with base R implementations and
+those only available in Stan. Distributions beyond these are not supported
+in the stan code but any user functions can be used in the R code.
+}
+\seealso{
+Utility functions for working with distributions
+\code{\link{add_name_attribute}()},
+\code{\link{pcd_dist_name}()},
+\code{\link{pcd_primary_distributions}}
+}
+\concept{utils}
+\keyword{datasets}
diff --git a/man/pcd_load_stan_functions.Rd b/man/pcd_load_stan_functions.Rd
index f70c99f5..ad21ee0d 100644
--- a/man/pcd_load_stan_functions.Rd
+++ b/man/pcd_load_stan_functions.Rd
@@ -36,6 +36,7 @@ Load Stan functions as a string
 }
 \seealso{
 Tools for working with package Stan functions
+\code{\link{pcd_stan_dist_id}()},
 \code{\link{pcd_stan_files}()},
 \code{\link{pcd_stan_functions}()},
 \code{\link{pcd_stan_path}()}
diff --git a/man/pcd_primary_distributions.Rd b/man/pcd_primary_distributions.Rd
new file mode 100644
index 00000000..f33e78c6
--- /dev/null
+++ b/man/pcd_primary_distributions.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{pcd_primary_distributions}
+\alias{pcd_primary_distributions}
+\title{Supported primary event distributions}
+\format{
+A data.frame with 2 rows and 4 columns:
+\describe{
+\item{name}{Distribution name}
+\item{dprimary}{R density function name}
+\item{aliases}{Alternative names/identifiers}
+\item{stan_id}{Stan distribution ID used in the stan code}
+}
+}
+\usage{
+pcd_primary_distributions
+}
+\description{
+A dataset containing information about the supported primary event
+distributions in primarycensored. Distributions beyond these are not
+supported in the stan code but any user functions can be used in the R code.
+}
+\seealso{
+Utility functions for working with distributions
+\code{\link{add_name_attribute}()},
+\code{\link{pcd_dist_name}()},
+\code{\link{pcd_distributions}}
+}
+\concept{utils}
+\keyword{datasets}
diff --git a/man/pcd_stan_dist_id.Rd b/man/pcd_stan_dist_id.Rd
new file mode 100644
index 00000000..678d6ea7
--- /dev/null
+++ b/man/pcd_stan_dist_id.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pcd-stan-tools.R
+\name{pcd_stan_dist_id}
+\alias{pcd_stan_dist_id}
+\title{Get distribution stan ID by name}
+\usage{
+pcd_stan_dist_id(name, type = c("delay", "primary"))
+}
+\arguments{
+\item{name}{String. Distribution name or alias}
+
+\item{type}{String. "delay" or "primary" corresponding to the type of
+distribution to use as the look up. If delay then \code{\link[=pcd_distributions]{pcd_distributions()}}
+is used, if primary then \code{\link[=pcd_primary_distributions]{pcd_primary_distributions()}} is used.}
+}
+\value{
+Numeric distribution ID
+}
+\description{
+Get distribution stan ID by name
+}
+\examples{
+pcd_stan_dist_id("lnorm")
+pcd_stan_dist_id("lognormal")
+pcd_stan_dist_id("gamma")
+pcd_stan_dist_id("weibull")
+pcd_stan_dist_id("exp")
+pcd_stan_dist_id("unif", type = "primary")
+}
+\seealso{
+Tools for working with package Stan functions
+\code{\link{pcd_load_stan_functions}()},
+\code{\link{pcd_stan_files}()},
+\code{\link{pcd_stan_functions}()},
+\code{\link{pcd_stan_path}()}
+}
+\concept{stantools}
diff --git a/man/pcd_stan_files.Rd b/man/pcd_stan_files.Rd
index 3508d9fc..5310fbe1 100644
--- a/man/pcd_stan_files.Rd
+++ b/man/pcd_stan_files.Rd
@@ -24,6 +24,7 @@ filtering for files that contain specific functions.
 \seealso{
 Tools for working with package Stan functions
 \code{\link{pcd_load_stan_functions}()},
+\code{\link{pcd_stan_dist_id}()},
 \code{\link{pcd_stan_functions}()},
 \code{\link{pcd_stan_path}()}
 }
diff --git a/man/pcd_stan_functions.Rd b/man/pcd_stan_functions.Rd
index 5437b347..0a752e34 100644
--- a/man/pcd_stan_functions.Rd
+++ b/man/pcd_stan_functions.Rd
@@ -22,6 +22,7 @@ the names of all functions defined in those files.
 \seealso{
 Tools for working with package Stan functions
 \code{\link{pcd_load_stan_functions}()},
+\code{\link{pcd_stan_dist_id}()},
 \code{\link{pcd_stan_files}()},
 \code{\link{pcd_stan_path}()}
 }
diff --git a/man/pcd_stan_path.Rd b/man/pcd_stan_path.Rd
index ffc6a056..53c8091d 100644
--- a/man/pcd_stan_path.Rd
+++ b/man/pcd_stan_path.Rd
@@ -15,6 +15,7 @@ Get the path to the Stan code
 \seealso{
 Tools for working with package Stan functions
 \code{\link{pcd_load_stan_functions}()},
+\code{\link{pcd_stan_dist_id}()},
 \code{\link{pcd_stan_files}()},
 \code{\link{pcd_stan_functions}()}
 }
diff --git a/man/pcens_cdf.Rd b/man/pcens_cdf.Rd
index a90cecad..15b12d28 100644
--- a/man/pcens_cdf.Rd
+++ b/man/pcens_cdf.Rd
@@ -23,7 +23,11 @@ breaks down.}
 Vector of computed primary event censored CDFs
 }
 \description{
-Compute primary event censored CDF
+This function dispatches to either analytical solutions (if available) or
+numerical integration via the default method. To see which combinations have
+analytical solutions implemented, use \code{methods(pcens_cdf)}. For example,
+\code{pcens_cdf.gamma_unif} indicates an analytical solution exists for gamma
+delay with uniform primary event distributions.
 }
 \seealso{
 Low level primary event censored distribution objects and methods
diff --git a/man/pcens_cdf.default.Rd b/man/pcens_cdf.default.Rd
index 3e946c1c..e175448d 100644
--- a/man/pcens_cdf.default.Rd
+++ b/man/pcens_cdf.default.Rd
@@ -25,7 +25,7 @@ Vector of computed primary event censored CDFs
 \description{
 This method serves as a fallback for combinations of delay and primary
 event distributions that don't have specific implementations. It uses
-the numeric integration method.
+a numeric integration method.
 }
 \details{
 This method implements the numerical integration approach for computing
diff --git a/man/pprimarycensored.Rd b/man/pprimarycensored.Rd
index f91e0fa8..89b3adb3 100644
--- a/man/pprimarycensored.Rd
+++ b/man/pprimarycensored.Rd
@@ -107,10 +107,8 @@ censored CDF using \code{\link[=pcens_cdf]{pcens_cdf()}}. This abstraction allow
 for automatic use of analytical solutions when available, while
 seamlessly falling back to numerical integration when necessary.
 
-Note: For analytical detection to work correctly, \code{pdist} and \code{dprimary}
-must be directly passed as distribution functions, not via assignment or
-\code{pdist_name} and \code{dprimary_name} must be used to override the default
-extraction of the function name.
+See \code{methods(pcens_cdf)} for which combinations have analytical
+solutions implemented.
 }
 \examples{
 # Example: Lognormal distribution with uniform primary events
diff --git a/tests/testthat/test-pcd_dist_name.R b/tests/testthat/test-pcd_dist_name.R
new file mode 100644
index 00000000..dca53859
--- /dev/null
+++ b/tests/testthat/test-pcd_dist_name.R
@@ -0,0 +1,23 @@
+test_that("pcd_dist_name works for valid distributions", {
+  # Test delay distributions
+  expect_identical(pcd_dist_name("lnorm", "delay"), "plnorm")
+  expect_identical(pcd_dist_name("lognormal", "delay"), "plnorm")
+  expect_identical(pcd_dist_name("gamma", "delay"), "pgamma")
+
+  # Test primary distributions
+  expect_identical(pcd_dist_name("unif", "primary"), "dunif")
+  expect_identical(pcd_dist_name("uniform", "primary"), "dunif")
+  expect_identical(pcd_dist_name("expgrowth", "primary"), "dexpgrowth")
+})
+
+test_that("pcd_dist_name gives informative errors", {
+  expect_error(
+    pcd_dist_name("nonexistent", "delay"),
+    "No delay distribution found matching: nonexistent"
+  )
+
+  expect_error(
+    pcd_dist_name("badprim", "primary"),
+    "No primary distribution found matching: badprim"
+  )
+})
diff --git a/tests/testthat/test-pcd_stan_dist_id.R b/tests/testthat/test-pcd_stan_dist_id.R
new file mode 100644
index 00000000..6a5dc8ef
--- /dev/null
+++ b/tests/testthat/test-pcd_stan_dist_id.R
@@ -0,0 +1,32 @@
+test_that("pcd_stan_dist_id works for valid distributions", {
+  # Test delay distributions
+  expect_identical(pcd_stan_dist_id("lnorm", "delay"), 1L)
+  expect_identical(pcd_stan_dist_id("lognormal", "delay"), 1L)
+  expect_identical(pcd_stan_dist_id("gamma", "delay"), 2L)
+
+  # Test primary distributions
+  expect_identical(pcd_stan_dist_id("unif", "primary"), 1L)
+  expect_identical(pcd_stan_dist_id("uniform", "primary"), 1L)
+  expect_identical(pcd_stan_dist_id("expgrowth", "primary"), 2L)
+})
+
+test_that("pcd_stan_dist_id gives informative errors", {
+  expect_error(
+    pcd_stan_dist_id("nonexistent", "delay"),
+    "No delay distribution found matching: nonexistent"
+  )
+
+  expect_error(
+    pcd_stan_dist_id("badprim", "primary"),
+    "No primary distribution found matching: badprim"
+  )
+})
+
+test_that("Distribution IDs match Stan model definitions", {
+  # This ensures consistency with the Stan code's dist_id numbering
+  delay_dists <- pcd_distributions
+  expect_identical(delay_dists$stan_id, seq_len(nrow(delay_dists)))
+
+  prim_dists <- pcd_primary_distributions
+  expect_identical(prim_dists$stan_id, seq_len(nrow(prim_dists)))
+})
diff --git a/tests/testthat/test-suggest_dist_name.R b/tests/testthat/test-suggest_dist_name.R
new file mode 100644
index 00000000..455199b9
--- /dev/null
+++ b/tests/testthat/test-suggest_dist_name.R
@@ -0,0 +1,39 @@
+test_that(".suggest_dist_name works for delay distributions", {
+  # Test close match suggestion
+  expect_identical(
+    .suggest_dist_name("gamm"),
+    "Did you mean: gamma?"
+  )
+
+  # Test multiple close matches
+  expect_identical(
+    .suggest_dist_name("exp"),
+    "Did you mean: exp?"
+  )
+
+  # Test no close matches shows all distributions
+  expect_identical(
+    .suggest_dist_name("notadist"),
+    paste0(
+      "Available distributions:",
+      toString(unique(primarycensored::pcd_distributions$name))
+    )
+  )
+})
+
+test_that(".suggest_dist_name works for primary distributions", {
+  # Test close match suggestion
+  expect_identical(
+    .suggest_dist_name("unif", type = "primary"),
+    "Did you mean: unif?"
+  )
+
+  # Test no close matches shows all distributions
+  expect_identical(
+    .suggest_dist_name("notadist", type = "primary"),
+    paste0(
+      "Available distributions:",
+      toString(unique(primarycensored::pcd_primary_distributions$name))
+    )
+  )
+})
diff --git a/vignettes/fitting-dists-with-stan.Rmd b/vignettes/fitting-dists-with-stan.Rmd
index 50047ace..26b57e5e 100644
--- a/vignettes/fitting-dists-with-stan.Rmd
+++ b/vignettes/fitting-dists-with-stan.Rmd
@@ -291,8 +291,8 @@ pcd_data <- pcd_as_stan_data(
   delay = "observed_delay",
   delay_upper = "observed_delay_upper",
   relative_obs_time = "obs_time",
-  dist_id = 1, # Lognormal distribution
-  primary_id = 1, # Uniform primary distribution
+  dist_id = pcd_stan_dist_id("lognormal", "delay"),
+  primary_id = pcd_stan_dist_id("uniform", "primary"),
   param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)),
   primary_param_bounds = list(lower = numeric(0), upper = numeric(0)),
   priors = list(location = c(1, 0.5), scale = c(1, 0.5)),