mlr-org
diff --git a/‎DESCRIPTION
+10-6 b/‎DESCRIPTION
+10-6
diff --git a/‎NAMESPACE
+3 b/‎NAMESPACE
+3
diff --git a/‎NEWS.md
+3-1 b/‎NEWS.md
+3-1
diff --git a/‎R/PipeOpPredRegrSurvPEM.R
+117 b/‎R/PipeOpPredRegrSurvPEM.R
+117
diff --git a/‎R/PipeOpTaskSurvClassifDiscTime.R
+6-3 b/‎R/PipeOpTaskSurvClassifDiscTime.R
+6-3
@@ -1,6 +1,6 @@
 Package: mlr3proba
 Title: Probabilistic Supervised Learning for 'mlr3'
-Version: 0.7.4
+Version: 0.7.5
 Authors@R: c(
     person("Raphael", "Sonabend", , "raphaelsonabend@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0001-9225-4654")),
@@ -18,7 +18,8 @@ Authors@R: c(
     person("Maximilian", "Muecke", , "muecke.maximilian@gmail.com", role = "ctb",
            comment = c(ORCID = "0009-0000-9432-9795")),
     person("Lee Xingzhuo", "Li", , "xingzhuo_li@yahoo.com.au", role = "ctb",
-           comment = c(ORCID = "0000-0001-5259-5198"))
+           comment = c(ORCID = "0000-0001-5259-5198")),
+    person("Markus", "Goeswein", , "markus.goeswein@outlook.de", role = "ctb")
   )
 Description: Provides extensions for probabilistic supervised learning for
     'mlr3'.  This includes extending the regression task to probabilistic
@@ -28,7 +29,7 @@ License: LGPL-3
 URL: https://mlr3proba.mlr-org.com, https://github.com/mlr-org/mlr3proba
 BugReports: https://github.com/mlr-org/mlr3proba/issues
 Depends:
-    mlr3 (>= 0.14.1),
+    mlr3 (>= 0.23.0),
     R (>= 3.5.0)
 Imports:
     checkmate,
@@ -48,7 +49,7 @@ Suggests:
     knitr,
     lgr,
     lifecycle,
-    mlr3learners,
+    mlr3learners (>= 0.10.0),
     mlr3viz,
     pammtools,
     param6 (>= 0.2.4),
@@ -58,13 +59,14 @@ Suggests:
     set6 (>= 0.2.6),
     simsurv,
     survAUC,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    glmnet
 LinkingTo:
     Rcpp
 Remotes:
     xoopR/distr6,
     xoopR/param6,
-    xoopR/set6
+    xoopR/set6,
 ByteCompile: true
 Config/testthat/edition: 3
 Encoding: UTF-8
@@ -115,11 +117,13 @@ Collate:
     'PipeOpDistrCompositor.R'
     'PipeOpPredClassifSurvDiscTime.R'
     'PipeOpPredClassifSurvIPCW.R'
+    'PipeOpPredRegrSurvPEM.R'
     'PipeOpProbregrCompositor.R'
     'PipeOpResponseCompositor.R'
     'PipeOpSurvAvg.R'
     'PipeOpTaskSurvClassifDiscTime.R'
     'PipeOpTaskSurvClassifIPCW.R'
+    'PipeOpTaskSurvRegrPEM.R'
     'PredictionDataDens.R'
     'PredictionDataSurv.R'
     'PredictionDens.R'
 
@@ -72,11 +72,13 @@ export(PipeOpCrankCompositor)
 export(PipeOpDistrCompositor)
 export(PipeOpPredClassifSurvDiscTime)
 export(PipeOpPredClassifSurvIPCW)
+export(PipeOpPredRegrSurvPEM)
 export(PipeOpProbregr)
 export(PipeOpResponseCompositor)
 export(PipeOpSurvAvg)
 export(PipeOpTaskSurvClassifDiscTime)
 export(PipeOpTaskSurvClassifIPCW)
+export(PipeOpTaskSurvRegrPEM)
 export(PredictionDens)
 export(PredictionSurv)
 export(TaskDens)
@@ -95,6 +97,7 @@ export(get_mortality)
 export(pecs)
 export(pipeline_survtoclassif_IPCW)
 export(pipeline_survtoclassif_disctime)
+export(pipeline_survtoregr_pem)
 export(plot_probregr)
 import(checkmate)
 import(data.table)
 
@@ -1,6 +1,8 @@
-# mlr3proba dev
+# mlr3proba 0.7.5
 
 * fix: allow cloning of measures objects
+* New `PipeOp`s: `PipeOpTaskSurvRegrPEM`, `PipeOpPredRegrPEM`
+* New pipeline (**reduction method**): `pipeline_survtoregr_pem`
 
 # mlr3proba 0.7.4
 
 
@@ -0,0 +1,117 @@
+#' @title PipeOpPredRegrSurvPEM
+#' @name mlr_pipeops_trafopred_regrsurv_pem
+#'
+#' @description
+#' Transform [PredictionRegr] to [PredictionSurv].
+#' The predicted piece-wise constant hazards contained in [PredictionRegr] are transformed into survival probabilities and wrapped in a
+#' [PredictionSurv] object.
+#'
+#'  We compute the survival probability from the predicted hazards using the following relation:
+#'  \deqn{S(t | \mathbf{x}) = \exp \left( - \int_{0}^{t} \lambda(s | \mathbf{x}) \, ds \right) = \exp \left( - \sum_{j = 1}^{J} \lambda(j | \mathbf{x}) d_j\,  \right),}
+#'  where \eqn{j = 1, \ldots, J} denotes the interval, \eqn{t} the time, and \eqn{d_j} the duration of interval \eqn{j}.
+#'
+#'  For a more detailed description of PEM, refer to [pipeline_survtoregr_pem] or the referred article.
+#'
+#' @section Dictionary:
+#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the
+#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops]
+#' or with the associated sugar function [mlr3pipelines::po()]:
+#' ```
+#' PipeOpPredRegrSurvPEM$new()
+#' mlr_pipeops$get("trafopred_regrsurv_pem")
+#' po("trafopred_regrsurv_pem")
+#' ```
+#'
+#' @section Input and Output Channels:
+#' The input consists of a [PredictionRegr] and a [data.table][data.table::data.table]
+#' containing the transformed data. The [PredictionRegr] is provided by the [mlr3::LearnerRegr],
+#' while the [data.table] is generated by [PipeOpTaskSurvRegrPEM].
+#' The output is the input [PredictionRegr] transformed to a [PredictionSurv].
+#' Only works during prediction phase.
+#'
+#' @references
+#' `r format_bib("bender_2018")`
+#'
+#' @seealso [pipeline_survtoregr_pem]
+#' @family PipeOps
+#' @family Transformation PipeOps
+#' @export
+PipeOpPredRegrSurvPEM = R6Class(
+  "PipeOpPredRegrSurvPEM",
+  inherit = mlr3pipelines::PipeOp,
+
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    #' @param id (character(1))\cr
+    #' Identifier of the resulting object.
+    initialize = function(id = "trafopred_regrsurv_pem") {
+      super$initialize(
+        id = id,
+        input = data.table(
+          name = c("input", "transformed_data"),
+          train = c("NULL", "data.table"),
+          predict = c("PredictionRegr", "data.table")
+        ),
+        output = data.table(
+          name = "output",
+          train = "NULL",
+          predict = "PredictionSurv"
+        )
+      )
+    }
+  ),
+
+  active = list(
+    #' @field predict_type (`character(1)`)\cr
+    #' Returns the active predict type of this PipeOp, which is `"crank"`
+    predict_type = function(rhs) {
+      assert_ro_binding(rhs)
+      "crank"
+    }
+  ),
+
+  private = list(
+    .predict = function(input) {
+      pred = input[[1]] # predicted hazards provided by the regression learner
+      data = input[[2]] # transformed data
+      assert_true(!is.null(pred$response))
+
+      data = cbind(data, dt_hazard = pred$response)
+
+      # From theory, convert hazards to surv as exp(-cumsum(h(t) * exp(offset)))
+      rows_per_id = nrow(data) / length(unique(data$id))
+
+      surv = t(vapply(unique(data$id), function(unique_id) {
+        exp(-cumsum(data[data$id == unique_id, ][["dt_hazard"]] * exp(data[data$id == unique_id, ][["offset"]])))
+      }, numeric(rows_per_id)))
+
+      unique_end_times = sort(unique(data$tend))
+      # coerce to distribution and crank
+      pred_list = .surv_return(times = unique_end_times, surv = surv)
+
+      # select the real tend values by only selecting the last row of each id
+      # basically a slightly more complex unique()
+      real_tend = data$obs_times[seq_len(nrow(data)) %% rows_per_id == 0]
+
+      ids = unique(data$id)
+      # select last row for every id => observed times
+      id = pem_status = NULL # to fix note
+      data = data[, .SD[.N, list(pem_status)], by = id]
+
+      # create prediction object
+      p = PredictionSurv$new(
+        row_ids = ids,
+        crank = pred_list$crank, distr = pred_list$distr,
+        truth = Surv(real_tend, as.integer(as.character(data$pem_status))))
+
+      list(p)
+    },
+
+    .train = function(input) {
+      self$state = list()
+      list(input)
+    }
+  )
+)
+register_pipeop("trafopred_regrsurv_pem", PipeOpPredRegrSurvPEM)
@@ -28,7 +28,7 @@
 #' [TaskClassif][mlr3::TaskClassif].
 #' The target column is named `"disc_status"` and indicates whether an event occurred
 #' in each time interval.
-#' An additional feature named `"tend"` contains the end time point of each interval.
+#' An additional numeric feature named `"tend"` contains the end time point of each interval.
 #' Lastly, the "output" task has a column with the original observation ids,
 #' under the role `"original_ids"`.
 #' The "transformed_data" is an empty [data.table][data.table::data.table].
@@ -134,12 +134,13 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       if (!is.null(max_time)) {
         assert(max_time > data[get(event_var) == 1, min(get(time_var))],
-               "max_time must be greater than the minimum event time.")
+               .var.name = "max_time must be greater than the minimum event time.")
       }
 
       form = formulate(sprintf("Surv(%s, %s)", time_var, event_var), ".")
 
-      long_data = pammtools::as_ped(data = data, formula = form, cut = cut, max_time = max_time)
+      long_data = pammtools::as_ped(data = data, formula = form,
+                                    cut = cut, max_time = max_time)
       self$state$cut = attributes(long_data)$trafo_args$cut
       long_data = as.data.table(long_data)
       setnames(long_data, old = "ped_status", new = "disc_status")
@@ -172,6 +173,8 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       max_time = max(cut)
       time = data[[time_var]]
+      # setting time variable to max_time ensures that the ped data spans
+      # over all intervals for every subject irrespective of event time
       data[[time_var]] = max_time
 
       status = data[[event_var]]