diff --git a/.github/workflows/check-linux.yaml b/.github/workflows/check-linux.yaml new file mode 100644 index 00000000..eaf27c6f --- /dev/null +++ b/.github/workflows/check-linux.yaml @@ -0,0 +1,54 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: check-linux + +jobs: + check-linux: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + # Shorter timeout to prevent mac builders hanging for 6 hours! + timeout-minutes: 30 + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + local::../.. + needs: check + working-directory: drivers/linux + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + working-directory: drivers/linux diff --git a/drivers/pbs/.Rbuildignore b/drivers/linux/.Rbuildignore similarity index 100% rename from drivers/pbs/.Rbuildignore rename to drivers/linux/.Rbuildignore diff --git a/drivers/pbs/.github/.gitignore b/drivers/linux/.github/.gitignore similarity index 100% rename from drivers/pbs/.github/.gitignore rename to drivers/linux/.github/.gitignore diff --git a/drivers/pbs/.github/workflows/R-CMD-check.yaml b/drivers/linux/.github/workflows/R-CMD-check.yaml similarity index 100% rename from drivers/pbs/.github/workflows/R-CMD-check.yaml rename to drivers/linux/.github/workflows/R-CMD-check.yaml diff --git a/drivers/pbs/.github/workflows/pkgdown.yaml b/drivers/linux/.github/workflows/pkgdown.yaml similarity index 100% rename from drivers/pbs/.github/workflows/pkgdown.yaml rename to drivers/linux/.github/workflows/pkgdown.yaml diff --git a/drivers/pbs/.github/workflows/test-coverage.yaml b/drivers/linux/.github/workflows/test-coverage.yaml similarity index 100% rename from drivers/pbs/.github/workflows/test-coverage.yaml rename to drivers/linux/.github/workflows/test-coverage.yaml diff --git a/drivers/pbs/.gitignore b/drivers/linux/.gitignore similarity index 100% rename from drivers/pbs/.gitignore rename to drivers/linux/.gitignore diff --git a/drivers/pbs/.lintr b/drivers/linux/.lintr similarity index 100% rename from drivers/pbs/.lintr rename to drivers/linux/.lintr diff --git a/drivers/pbs/DESCRIPTION b/drivers/linux/DESCRIPTION similarity index 62% rename from drivers/pbs/DESCRIPTION rename to drivers/linux/DESCRIPTION index 22bb3459..5086628b 100644 --- a/drivers/pbs/DESCRIPTION +++ b/drivers/linux/DESCRIPTION @@ -1,19 +1,24 @@ -Package: hipercow.pbs -Title: Generic PBS Support for 'hipercow' +Package: hipercow.linux +Title: Generic Linux Support for 'hipercow' Version: 0.1.0 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Wes", "Hinsley", role = "aut"), person("Imperial College of Science, Technology and Medicine", role = "cph")) -Description: Driver for using any PBS cluster. Specific cluster drivers - will be needed to set up your cluster, but these will be tiny. +Description: Driver for using any Linux cluster (running pbs or + slurm). Specific cluster drivers will be needed to set up your + cluster, but these will be tiny packages that mostly provide a + small amount of configuration and additional tooling. License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 URL: https://github.com/mrc-ide/hipercow, https://mrc-ide.github.io/hipercow BugReports: https://github.com/mrc-ide/hipercow/issues +Imports: + cli, + hipercow Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/drivers/pbs/LICENSE b/drivers/linux/LICENSE similarity index 100% rename from drivers/pbs/LICENSE rename to drivers/linux/LICENSE diff --git a/drivers/pbs/Makefile b/drivers/linux/Makefile similarity index 100% rename from drivers/pbs/Makefile rename to drivers/linux/Makefile diff --git a/drivers/pbs/NAMESPACE b/drivers/linux/NAMESPACE similarity index 100% rename from drivers/pbs/NAMESPACE rename to drivers/linux/NAMESPACE diff --git a/drivers/linux/R/batch.R b/drivers/linux/R/batch.R new file mode 100644 index 00000000..52f86ed9 --- /dev/null +++ b/drivers/linux/R/batch.R @@ -0,0 +1,73 @@ +write_batch_task_run <- function(task_id, config, resources, path_root) { + data <- template_data(config, resources, path_root) + data$task_id <- task_id + template_name <- sprintf("%s/task_run.sh", config$manager) + str <- glue_whisker(read_template(template_name), data) + path <- file.path(path_root, "hipercow", "tasks", task_id, BATCH_RUN) + writeLines(str, path) + path + +} + + +read_template <- function(name) { + read_lines(hipercow_linux_file(sprintf("templates/%s", name))) +} + + +template_data <- function(config, resources, path_root) { + ## Semicolon delimited list on windows; see "Managing libraries" in + ## https://cran.r-project.org/doc/manuals/r-release/R-admin.html + hipercow_library <- paste(config$path_lib, path_bootstrap(config), sep = ":") + + ## TODO: default walltime should come from the configuration + walltime <- walltime_with_default(resources, "01:00:00") + ## TODO: treatment of cores is quite hard, especially where cores = + ## Inf wants to select a single node. There's also issues around + ## "mpinodes" and issues around the complete lack of any sort of + ## usable documentation. + cores <- resources$cores + stopifnot(is.finite(cores)) + + memory <- memory_with_default(resources, "4") + + ## TODO: we're going to need to query this periodically and find the + ## best match. Running 'module avail R/' gets us most of the way + ## there but we need this in machine readable format and also limit + ## to those in the "prod" set.. The prod module will be ict + ## specific so that needs to go into the configuration too. + r_module <- "R/4.3.2-gfbf-2023a" + + list( + hostname = hostname(), + date = as.character(Sys.time()), + hipercow_version = hipercow_version(), + r_module = r_module, + walltime = walltime, + cores = cores, + memory = memory, + hipercow_library = hipercow_library) +} + + + +walltime_with_default <- function(resources, default) { + if (is.null(resources$max_runtime)) { + return(default) + } + mins <- resources$max_runtime %% 60 + hours <- resources$max_runtime %/% 60 + sprintf("%02d:02d:00", hours, mins) +} + + +memory_with_default <- function(resources, default) { + if (!is.null(resources$memory_per_node)) { + return(resources$memory_per_node) + } + if (!is.null(resources$memory_per_process)) { + stopifnot(is.finite(resources$cores)) + return(resources$memory_per_process * resources$cores) + } + default +} diff --git a/drivers/linux/R/bootstrap.R b/drivers/linux/R/bootstrap.R new file mode 100644 index 00000000..e04f2eae --- /dev/null +++ b/drivers/linux/R/bootstrap.R @@ -0,0 +1,3 @@ +## TODO: we need something that the user can run easily without having +## installed hipercow that will install it in the right place in the +## directory that they are looking at. diff --git a/drivers/linux/R/check.R b/drivers/linux/R/check.R new file mode 100644 index 00000000..3729caf8 --- /dev/null +++ b/drivers/linux/R/check.R @@ -0,0 +1,3 @@ +linux_check <- function(path_root) { + TRUE +} diff --git a/drivers/linux/R/config.R b/drivers/linux/R/config.R new file mode 100644 index 00000000..9906a3f6 --- /dev/null +++ b/drivers/linux/R/config.R @@ -0,0 +1,6 @@ +new_linux_config <- function(manager) { + if (manager != "pbs") { + cli::cli_abort("Only pbs cluster managers are supported") + } + list(manager = manager) +} diff --git a/drivers/linux/R/constants.R b/drivers/linux/R/constants.R new file mode 100644 index 00000000..bb95c38f --- /dev/null +++ b/drivers/linux/R/constants.R @@ -0,0 +1,5 @@ +# nolint start +BATCH_RUN <- "run.sh" +SCHEDULER_ID <- "scheduler_id" +TASK_LOG <- "log" +# nolint end diff --git a/drivers/linux/R/driver.R b/drivers/linux/R/driver.R new file mode 100644 index 00000000..eead74ae --- /dev/null +++ b/drivers/linux/R/driver.R @@ -0,0 +1,121 @@ +hipercow_driver_linux <- function() { + hipercow::hipercow_driver( + configure = linux_configure, + submit = linux_submit, + status = linux_status, + info = linux_info, + log = linux_log, + result = linux_result, + cancel = linux_cancel, + provision_run = linux_provision_run, + provision_list = linux_provision_list, + provision_compare = linux_provision_compare, + keypair = linux_keypair, + check_hello = linux_check_hello, + cluster_info = linux_cluster_info) +} + + +linux_submit <- function(id, resources, config, path_root) { + path_batch <- write_batch_task_run(id, config, resources, path_root) + scheduler_id <- do_linux_submit(path_batch, id, config, path_root) + path_scheduler_id <- file.path(dirname(path_batch), SCHEDULER_ID) + writeLines(dide_id, path_scheduler_id) +} + + +linux_status <- function(id, config, path_root) { + path_started <- file.path(path_root, "hipercow", "tasks", id, + "status-running") + ifelse(file.exists(path_started), "running", "submitted") +} + + +linux_info <- function(id, config, path_root) { + path_scheduler_id <- file.path( + path_root, "hipercow", "tasks", id, SCHEDULER_ID) + scheduler_id <- readLines(path_scheduler_id) + status <- do_linux_status(scheduler_id, config) + list(status = status, + time_started = time_started(id, path_root)) +} + + +linux_result <- function(id, config, path_root) { + ## Nothing to do here, but we might want to do something in the + ## cases where the result is not found but the task has failed. +} + + +linux_log <- function(id, outer, config, path_root) { + if (outer) { + path_scheduler_id <- file.path(path_root, "hipercow", "tasks", id, SCHEDULER_ID) + scheduler_id <- readLines(path_scheduler_id) + ## TODO: read .o and .e files + NULL + } else { + readlines_if_exists(file.path(path_root, "hipercow", "tasks", id, TASK_LOG)) + } +} + + +linux_cancel <- function(id, config, path_root) { + path_scheduler_id <- file.path(path_root, "hipercow", "tasks", id, SCHEDULER_ID) + scheduler_id <- vcapply(path_scheduler_id, readLines, USE.NAMES = FALSE) + scheduler_id <- scheduler_id[order(as.integer(scheduler_id), decreasing = TRUE)] + cancelled <- do_linux_cancel(scheduler_id, config) + ## Times are awful: + time_started <- rep(Sys.time(), length(id)) + time_started[] <- NA + if (any(cancelled)) { + time_started[cancelled] <- time_started(id[cancelled], path_root) + } + list(cancelled = cancelled, time_started = time_started) +} + + +linux_check_hello <- function(config, path_root) { + if (!linux_check(path_root)) { + cli::cli_abort("Failed checks for using linux cluster; please see above") + } + resources <- hipercow::hipercow_resources_validate(NULL, "linux", path_root) + ## TODO: perhaps clusters could return a "fast" queue for use. + resources +} + + +linux_keypair <- function(config, path_root) { + cli::cli_abort("encryption not yet supported with the linux driver") +} + + +linux_provision <- function(args, config, path_root) { + cli::cli_abort("provisioning is important but hard") +} + + +linux_provision_list <- function(args, config, path_root) { + if (is.null(args)) { + hash <- NULL + } else { + hash <- conan_config <- rlang::inject(conan2::conan_configure( + !!!args, + path = path_root, + path_lib = config$path_lib, + path_bootstrap = path_bootstrap(config)))$hash + } + path_lib <- file.path(path_root, config$path_lib) + conan2::conan_list(path_lib, hash) +} + + +linux_provision_compare <- function(curr, prev, config, path_root) { + path_lib <- file.path(path_root, config$path_lib) + conan2::conan_compare(path_lib, curr, prev) +} + + +time_started <- function(id, path_root) { + path <- file.path(path_root, "hipercow", "tasks", id, "status-running") + file.info(path, extra_cols = FALSE)$ctime +} diff --git a/drivers/linux/R/manager.R b/drivers/linux/R/manager.R new file mode 100644 index 00000000..472fed05 --- /dev/null +++ b/drivers/linux/R/manager.R @@ -0,0 +1,39 @@ +do_linux_submit <- function(path_batch, config, path_root) { + scheduler_id <- switch( + config$manager, + pbs = do_linux_submit_pbs(path_batch, config, path_root), + cli::cli_abort("Submission from unknown manager '{config$manager}'")) +} + + +do_linux_status <- function(scheduler_id, config) { + switch(config$manager, + pbs = do_linux_status_pbs(scheduler_id, config), + cli::cli_abort("Status from unknown manager '{config$manager}'")) +} + + +do_linux_cancel <- function(scheduler_id, config) { + switch(config$manager, + pbs = do_linux_cancel_pbs(scheduler_id, config), + cli::cli_abort("Submission from unknown manager '{config$manager}'")) +} + + +do_linux_submit_pbs <- function(path_batch, id, config, path_root) { + withr::with_dir(path_root, system3("qsub", path_batch, stdout = TRUE)) +} + + +do_linux_status_pbs <- function(scheduler_id, config) { + ## TODO: we don't really know what this returns yet, and how to + ## interpret it. + system3("qstat", scheduler_id) +} + + +do_linux_cancel_pbs <- function(scheduler_id, config) { + ## TODO: we don't really know what this returns yet, and how to + ## interpret it. + system3("qdel", scheduler_id) +} diff --git a/drivers/linux/R/util.R b/drivers/linux/R/util.R new file mode 100644 index 00000000..19a95181 --- /dev/null +++ b/drivers/linux/R/util.R @@ -0,0 +1,37 @@ +`%||%` <- function(x, y) { # nolint + if (is.null(x)) y else x +} + + +glue_whisker <- function(template, data) { + transformer <- function(...) { + ## This transformer prevents a NULL entry destroying the string + glue::identity_transformer(...) %||% "" + } + glue::glue_data(data, template, .open = "{{", .close = "}}", + .trim = FALSE, .transformer = transformer) +} + +hostname <- function() { + Sys.info()[["nodename"]] +} + + +hipercow_linux_file <- function(path) { + system.file(path, mustWork = TRUE, package = "hipercow.linux") +} + + +hipercow_version <- function() { + as.character(utils::packageVersion("hipercow")) +} + + +read_lines <- function(...) { + paste(readLines(...), collapse = "\n") +} + + +normalize_path <- function(path) { + normalizePath(path, winslash = "/", mustWork = TRUE) +} diff --git a/drivers/pbs/README.md b/drivers/linux/README.md similarity index 51% rename from drivers/pbs/README.md rename to drivers/linux/README.md index f465136b..d73a7b94 100644 --- a/drivers/pbs/README.md +++ b/drivers/linux/README.md @@ -1,18 +1,18 @@ -# hipercow.pbs +# hipercow.linux [![Project Status: Concept – Minimal or no implementation has been done yet, or the repository is only intended to be a limited example, demo, or proof-of-concept.](https://www.repostatus.org/badges/latest/concept.svg)](https://www.repostatus.org/#concept) -[![R build status](https://github.com/mrc-ide/hipercow.pbs/workflows/R-CMD-check/badge.svg)](https://github.com/mrc-ide/hipercow.pbs/actions) -[![Build status]()](https://buildkite.com/mrc-ide/mrcide/hipercow-dot-pbs?branch=main) -[![codecov.io](https://codecov.io/github/mrc-ide/hipercow.pbs/coverage.svg?branch=main)](https://codecov.io/github/mrc-ide/hipercow.pbs?branch=main) +[![R build status](https://github.com/mrc-ide/hipercow.linux/workflows/R-CMD-check/badge.svg)](https://github.com/mrc-ide/hipercow.linux/actions) +[![Build status]()](https://buildkite.com/mrc-ide/mrcide/hipercow-dot-linux?branch=main) +[![codecov.io](https://codecov.io/github/mrc-ide/hipercow.linux/coverage.svg?branch=main)](https://codecov.io/github/mrc-ide/hipercow.linux?branch=main) ## Installation -To install `hipercow.pbs`: +To install `hipercow.linux`: ```r -remotes::install_github("mrc-ide/hipercow.pbs", upgrade = FALSE) +remotes::install_github("mrc-ide/hipercow.linux", upgrade = FALSE) ``` ## License diff --git a/drivers/linux/inst/templates/pbs/bootstrap.R b/drivers/linux/inst/templates/pbs/bootstrap.R new file mode 100644 index 00000000..2293bcd3 --- /dev/null +++ b/drivers/linux/inst/templates/pbs/bootstrap.R @@ -0,0 +1,42 @@ +path <- sprintf("I:/{{bootstrap_path}}/%s", + paste(unclass(getRversion())[[1]], collapse = ".")) +path_next <- sprintf("%s-next", path) +path_prev <- sprintf("%s-prev", path) +unlink(path_next, recursive = TRUE) +unlink(path_prev, recursive = TRUE) +if (file.exists(path_prev)) { + stop("Failed to remove previous-previous library") +} +if (file.exists(path_next)) { + stop("Failed to remove previous-next library") +} +dir.create(path_next, FALSE, TRUE) +.libPaths(path_next, FALSE) +message(sprintf("Installing packages into %s", path_next)) +pkgs <- c("hipercow", "remotes", "pkgdepends", "renv") +repos <- c("https://mrc-ide.r-universe.dev", "https://cloud.r-project.org") +install.packages(pkgs, path_next, repos = repos) +ok <- all(file.exists(file.path(path_next, pkgs, "Meta", "package.rds"))) +if (!ok) { + stop("Failed to install all packages") +} + +curr_exists <- file.exists(path) +if (curr_exists) { + # Default behaviour is to warn and just continue if the rename + # fails, which is wild, and also terrible. + stopifnot(file.rename(path, path_prev)) +} +stopifnot(file.rename(path_next, path)) +if (curr_exists) { + unlink(path_prev, recursive = TRUE) +} + +if (!is.null({{development_ref}})) { + .libPaths(path, FALSE) + ## We need to install this directly into the final library, + ## otherwise we can't move things over because "remotes" will have + ## been loaded and that creates a lock. + remotes::install_github("mrc-ide/hipercow", ref = {{development_ref}}, + upgrade = FALSE) +} diff --git a/drivers/linux/inst/templates/pbs/provision.sh b/drivers/linux/inst/templates/pbs/provision.sh new file mode 100755 index 00000000..44426c1c --- /dev/null +++ b/drivers/linux/inst/templates/pbs/provision.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +#PBS -lwalltime=00:30:00 +#PBS -lselect=1:ncpus=1:mem=4gb +cd ${PBS_O_WORKDIR} + +echo "generated on host: {{hostname}}" +echo "generated on date: {{date}}" +echo "hipercow version: {{hipercow_version}}" +echo "running on: $(hostname)" +echo "working directory: $(pwd)" + +module load tools/prod +module load {{r_module}} + +export RENV_AUTOLOADER_ENABLED=FALSE + +echo "this is a provisioning task" + +Rscript "hipercow/provision/{{id}}/conan.R" > "hipercow/provision/{{id}}/log" 2>&1 diff --git a/drivers/linux/inst/templates/pbs/task_run.sh b/drivers/linux/inst/templates/pbs/task_run.sh new file mode 100755 index 00000000..56781cac --- /dev/null +++ b/drivers/linux/inst/templates/pbs/task_run.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +#PBS -lwalltime={{walltime}} +#PBS -lselect=1:ncpus={{cores}}:mem={{memory}}gb + +### This technically makes this driver dependent on easybuild or +### this module system, and other ways would be possible. However, we +### don't know what those look like and this could easily be +### substituted in. The tools/prod is likely very Imperial-ICT +### specific too and we might want to make that configurable. +module load tools/prod +module load {{r_module}} + +### If the task was submitted from the root directory of hipercow +### (which we can guarantee if we don't do so already) this will +### change directory to the hipercow root from now, which is good for +### us. +cd ${PBS_O_WORKDIR} + +export R_LIBS_USER={{hipercow_library}} +export HIPERCOW_NO_DRIVERS=1 +export RENV_AUTOLOADER_ENABLED=FALSE +### According to NASA, who are we to argue... +export HIPERCOW_CORES=$NCPUS + +echo "generated on host: {{hostname}}" +echo "generated on date: {{date}}" +echo "hipercow version: {{hipercow_version}}" +echo "running on: $(hostname)" + +echo "working directory: $(pwd)" + +echo "this is a single task" + +Rscript -e "hipercow::task_eval('{{task_id}}', verbose = TRUE)" > "hipercow/tasks/{{task_id}}/log" 2>&1 diff --git a/drivers/linux/linux.Rproj b/drivers/linux/linux.Rproj new file mode 100644 index 00000000..21a4da08 --- /dev/null +++ b/drivers/linux/linux.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/drivers/linux/tests/testthat.R b/drivers/linux/tests/testthat.R new file mode 100644 index 00000000..cb607b23 --- /dev/null +++ b/drivers/linux/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(hipercow.linux) + +test_check("hipercow.linux") diff --git a/drivers/pbs/tests/testthat/test-util.R b/drivers/linux/tests/testthat/test-util.R similarity index 100% rename from drivers/pbs/tests/testthat/test-util.R rename to drivers/linux/tests/testthat/test-util.R diff --git a/drivers/pbs/R/util.R b/drivers/pbs/R/util.R deleted file mode 100644 index 0354298b..00000000 --- a/drivers/pbs/R/util.R +++ /dev/null @@ -1,3 +0,0 @@ -`%||%` <- function(x, y) { # nolint - if (is.null(x)) y else x -} diff --git a/drivers/pbs/tests/testthat.R b/drivers/pbs/tests/testthat.R deleted file mode 100644 index b699f795..00000000 --- a/drivers/pbs/tests/testthat.R +++ /dev/null @@ -1,4 +0,0 @@ -library(testthat) -library(hipercow.pbs) - -test_check("hipercow.pbs")