From 8103a029dce64dfa8bc2c41c0df91dc222c1348d Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:54:39 +0000 Subject: [PATCH 1/6] Remove `cat()` from functions --- .Rbuildignore | 1 + DESCRIPTION | 2 +- NEWS.md | 7 +++++ R/plot_motif_comparison.R | 4 +-- R/print_denovo_sections.R | 23 +++++++------- R/print_labels.R | 40 ++++++++++++++---------- README.md | 6 ++-- inst/markdown/MotifPeeker.Rmd | 30 +++++++++++++----- man/print_labels.Rd | 2 +- tests/testthat/test-denovo_motif_funcs.R | 6 ++-- tests/testthat/test-print_labels.R | 6 ++-- 11 files changed, 79 insertions(+), 48 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 65ab3fb..5797bdb 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,6 +6,7 @@ Icon? ^doc$ ^Meta$ ^codecov\.yml$ +^\.DS_Store$ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ diff --git a/DESCRIPTION b/DESCRIPTION index 8deb714..359a3e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: MotifPeeker Title: Benchmarking Epigenomic Profiling Methods Using Motif Enrichment -Version: 0.99.6 +Version: 0.99.7 Authors@R: c( person(given = "Hiranyamaya", family = "Dash", diff --git a/NEWS.md b/NEWS.md index d2d81bd..258527a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# MotifPeeker 0.99.7 + +## Miscellaneous + +* Remove `cat()` calls in functions. + + # MotifPeeker 0.99.5 / 0.99.6 ## Miscellaneous diff --git a/R/plot_motif_comparison.R b/R/plot_motif_comparison.R index 3658cd2..346b1fb 100644 --- a/R/plot_motif_comparison.R +++ b/R/plot_motif_comparison.R @@ -49,8 +49,8 @@ plot_motif_comparison <- function(comparison_matrices, xlab <- axis_labels$reference_common } if (all(is.na(matrices[[x]]))) { - msg <- utils::capture.output(cat("*No motifs were discovered", - "in one or both the comparison groups. Skipping plot...* \n")) + msg <- paste("*No motifs were discovered", + "in one or both the comparison groups. Skipping plot...* \n") return(msg) } if (nrow(matrices[[x]]) < 2 || ncol(matrices[[x]]) < 2) { diff --git a/R/print_denovo_sections.R b/R/print_denovo_sections.R index 0bfe7b8..0b9cbbd 100644 --- a/R/print_denovo_sections.R +++ b/R/print_denovo_sections.R @@ -41,13 +41,13 @@ print_denovo_sections <- function(motif_list, paste0("\n**Comparison Group - Unique Peaks** \nTotal peaks in ", "group: ", length(segregated_peaks$unique_seqs2), " \n") ) + out <- list() ### DT Func ### .print_dt <- function(i) { if (length(motif_list[[i]]) == 0 || length(similar_motifs[[i]]) == 0) { - msg <- utils::capture.output( - cat("*Either no motifs were discovered for this group, or no", - "similar motifs were found.* \n")) + msg <- paste("*Either no motifs were discovered for this group, or", + "no similar motifs were found.* \n") return(msg) } similar_motifs_i <- purrr::map_df(similar_motifs[[i]], as.data.frame) @@ -84,21 +84,22 @@ print_denovo_sections <- function(motif_list, } for (i in indices) { - cat(headers[[i]]) + out$first <- paste(headers[[i]], "\n ") ### DT ### - cat(" \n ") - print(.print_dt(i)) - cat(" \n ") - + out$DT <- .print_dt(i) + out$third <- " \n " + ### Download Buttons ### if (!is.null(download_buttons)) { if (!is.null(download_buttons$peak_file[[i]])) - print(download_buttons$peak_file[[i]]) + out$fourth <- download_buttons$peak_file[[i]] if (!is.null(download_buttons$streme_output[[i]])) - print(download_buttons$streme_output[[i]]) + out$fifth <- download_buttons$streme_output[[i]] if (!is.null(download_buttons$tomtom_output[[i]])) - print(download_buttons$tomtom_output[[i]]) + out$sixth <- download_buttons$tomtom_output[[i]] } } + + return(out) } diff --git a/R/print_labels.R b/R/print_labels.R index 3c342fc..0aa6ab7 100644 --- a/R/print_labels.R +++ b/R/print_labels.R @@ -8,7 +8,7 @@ #' @param read_counts A numeric vector of read counts for each experiment. #' (optional) #' -#' @returns None (invisible \code{NULL}) +#' @returns String with the labels of the reference and comparison experiments. #' #' @keywords internal print_labels <- function(exp_labels, @@ -16,29 +16,37 @@ print_labels <- function(exp_labels, comparison_index, header_type, read_counts = NULL) { + label_str <- "" if (header_type == "known_motif") { - cat("\n### ", exp_labels[comparison_index], " {- .unlisted} \n") + label_str <- paste0(label_str, + "\n### ", exp_labels[comparison_index], + " {- .unlisted} \n") } else if (header_type == "denovo_motif") { - cat("\n## ", exp_labels[comparison_index], " {- .unlisted .tabset ", - ".tabset-fade .tabset-pills} \n") + label_str <- paste0(label_str, + "\n## ", exp_labels[comparison_index], + " {- .unlisted .tabset ", + ".tabset-fade .tabset-pills} \n") } - cat("**Reference Experiment Label**: ", - exp_labels[reference_index]) + + label_str <- paste0(label_str, "**Reference Experiment Label**: ", + exp_labels[reference_index]) if (!is.null(read_counts)) { - cat(" (Total Reads: ", paste0( - pretty_number(read_counts[reference_index]), ") \n" - )) + label_str <- paste0(label_str, " (Total Reads: ", + pretty_number(read_counts[reference_index]), + ") \n") } else { - cat(" \n") + label_str <- paste0(label_str, " \n") } - cat("**Comparison Experiment Label**: ", - exp_labels[comparison_index]) + label_str <- paste0(label_str, "**Comparison Experiment Label**: ", + exp_labels[comparison_index]) if (!is.null(read_counts)) { - cat(" (Total Reads: ", paste0( - pretty_number(read_counts[comparison_index]), ") \n" - )) + label_str <- paste0(label_str, " (Total Reads: ", + pretty_number(read_counts[comparison_index]), + ") \n") } else { - cat(" \n") + label_str <- paste0(label_str, " \n") } + + return(label_str) } diff --git a/README.md b/README.md index 659ace8..547c60a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ style="height: 300px !important;" /> [![License: GPL (\>= 3)](https://img.shields.io/badge/license-GPL%20(%3E=%203)-blue.svg)](https://cran.r-project.org/web/licenses/GPL%20(%3E=%203)) -[![](https://img.shields.io/badge/devel%20version-0.99.6-black.svg)](https://github.com/neurogenomics/MotifPeeker) +[![](https://img.shields.io/badge/devel%20version-0.99.7-black.svg)](https://github.com/neurogenomics/MotifPeeker) [![](https://img.shields.io/github/languages/code-size/neurogenomics/MotifPeeker.svg)](https://github.com/neurogenomics/MotifPeeker) [![](https://img.shields.io/github/last-commit/neurogenomics/MotifPeeker.svg)](https://github.com/neurogenomics/MotifPeeker/commits/master)
[![R build @@ -18,7 +18,7 @@ status](https://github.com/neurogenomics/MotifPeeker/workflows/rworkflows/badge. **Authors:** ***Hiranyamaya (Hiru) Dash, Thomas Roberts, Nathan Skene*** -**Updated:** ***Oct-28-2024*** +**Updated:** ***Nov-01-2024*** ## Introduction @@ -355,7 +355,7 @@ utils::sessionInfo() ## R version 4.4.1 (2024-06-14) ## Platform: aarch64-apple-darwin20 - ## Running under: macOS 15.0.1 + ## Running under: macOS 15.1 ## ## Matrix products: default ## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd index bc307f3..88fb6b9 100644 --- a/inst/markdown/MotifPeeker.Rmd +++ b/inst/markdown/MotifPeeker.Rmd @@ -53,7 +53,7 @@ params: value: TRUE --- -```{r setup, warning=FALSE, message=FALSE} +```{r setup, warning=debug, message=debug, results=ifelse(debug, "markup", "hide")} knitr::opts_chunk$set(echo = TRUE, message = params$debug, warning = params$debug, @@ -795,7 +795,7 @@ if (!user_motif_metrics) { ### Individual Comparison Plots ### for (i in comparison_indices) { print_labels(result$exp_labels, params$reference_index, i, - "known_motif", result$read_count) + "known_motif", result$read_count) %>% cat() ## Plot MotifPeeker:::plot_enrichment_individual(result, enrichment_df, i, motif_i, label_colours, params$reference_index) %>% @@ -869,7 +869,7 @@ for (i in seq_along(comparison_indices)) { tab_header_flag <- TRUE } print_labels(result$exp_labels, params$reference_index, comparison_i, - "denovo_motif", result$read_count) + "denovo_motif", result$read_count) %>% cat() cat("
**Select Plot:** \n") start_i <- 1 + (i - 1) * 4 end_i <- i * 4 @@ -884,32 +884,46 @@ for (i in seq_along(comparison_indices)) { get_download_buttons(i, start_i, segregated_peaks, out_dir = out_dir_extra, add_buttons = params$download_buttons, verbose = params$verbose) .print_denovo_sections_i <- function(x, y) { - print_denovo_sections(streme_motifs, similar_motifs, segregated_peaks[[i]], + out <- print_denovo_sections(streme_motifs, similar_motifs, segregated_peaks[[i]], c(x, y), jaspar_link = using_jaspar_db, download_buttons = download_btns) + for (ele in out) { + if (typeof(ele) == "character") { + cat(ele) + } else { + print(ele) + } + } + } + .print_denovo_plt <- function(i) { + if (typeof(denovo_plts[[i]]) == "character") { + cat(denovo_plts[[i]]) + } else { + print(denovo_plts[[i]]) + } } cat("\n### Common Motif Comparison {- .unlisted} \n") cat("\n#### Motif Similarity Plot {- .unlisted} \n") - print(denovo_plts[[1]]) + .print_denovo_plt(1) cat("\n#### Motif Details {- .unlisted} \n") .print_denovo_sections_i(1, 2) cat("\n### Unique Motif Comparison {- .unlisted} \n") cat("\n#### Motif Similarity Plot {- .unlisted} \n") - print(denovo_plts[[2]]) + .print_denovo_plt(2) cat("\n#### Motif Details {- .unlisted} \n") .print_denovo_sections_i(3, 4) cat("\n### Cross Motif Comparison A {- .unlisted} \n") cat("\n#### Motif Similarity Plot {- .unlisted} \n") - print(denovo_plts[[3]]) + .print_denovo_plt(3) cat("\n#### Motif Details {- .unlisted} \n") .print_denovo_sections_i(3, 2) cat("\n### Cross Motif Comparison B {- .unlisted} \n") cat("\n#### Motif Similarity Plot {- .unlisted} \n") - print(denovo_plts[[4]]) + .print_denovo_plt(4) cat("\n#### Motif Details {- .unlisted} \n") .print_denovo_sections_i(4, 1) } diff --git a/man/print_labels.Rd b/man/print_labels.Rd index 203316d..69930cb 100644 --- a/man/print_labels.Rd +++ b/man/print_labels.Rd @@ -26,7 +26,7 @@ are: "known_motif" and "denovo_motif".} (optional)} } \value{ -None (invisible \code{NULL}) +String with the labels of the reference and comparison experiments. } \description{ Print the labels of the reference and comparison experiments diff --git a/tests/testthat/test-denovo_motif_funcs.R b/tests/testthat/test-denovo_motif_funcs.R index 74aa522..3ae4fd2 100644 --- a/tests/testthat/test-denovo_motif_funcs.R +++ b/tests/testthat/test-denovo_motif_funcs.R @@ -60,9 +60,7 @@ test_that("De-novo motif enrichment functions works", { expect_type(res5, "list") ### print_denovo_sections ### - .call_print_denovo_sections <- function() { - print_denovo_sections(res, res2, segregated_peaks, c(1,2), + section_out <- print_denovo_sections(res, res2, segregated_peaks, c(1,2), jaspar_link = TRUE, download_buttons = res5) - } - expect_invisible(.call_print_denovo_sections()) + expect_type(section_out, "list") }) diff --git a/tests/testthat/test-print_labels.R b/tests/testthat/test-print_labels.R index 50a5d8e..ddbca78 100644 --- a/tests/testthat/test-print_labels.R +++ b/tests/testthat/test-print_labels.R @@ -1,6 +1,8 @@ test_that("print_labels works", { exp_labels <- c("Exp1", "Exp2", "Exp3") read_counts <- c(100, 200, 300) - expect_invisible(print_labels(exp_labels, 1, 2, "known_motif", read_counts)) - expect_invisible(print_labels(exp_labels, 1, 2, "denovo_motif")) + expect_invisible(cat(print_labels(exp_labels, 1, 2, + "known_motif", read_counts))) + expect_invisible(cat(print_labels(exp_labels, 1, 2, + "denovo_motif"))) }) From cbeac23f294cbd2efe894bd15bdb5e7684487f58 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:09:12 +0000 Subject: [PATCH 2/6] Implement helper `check_input()` --- NEWS.md | 2 ++ R/check_ENCODE.R | 9 +++---- R/check_JASPAR.R | 8 +++---- R/check_input.R | 34 +++++++++++++++++++++++++++ R/link_JASPAR.R | 9 ++++--- R/read_motif_file.R | 4 +++- inst/markdown/MotifPeeker.Rmd | 25 ++++++++++++++------ man/check_ENCODE.Rd | 3 +-- man/check_JASPAR.Rd | 3 +-- man/check_input.Rd | 34 +++++++++++++++++++++++++++ man/link_JASPAR.Rd | 3 +-- tests/testthat/test-check_ENCODE.R | 8 ++++++- tests/testthat/test-check_JASPAR.R | 7 ++++-- tests/testthat/test-read_motif_file.R | 11 +++++---- 14 files changed, 125 insertions(+), 35 deletions(-) create mode 100644 R/check_input.R create mode 100644 man/check_input.Rd diff --git a/NEWS.md b/NEWS.md index 258527a..e4097e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ ## Miscellaneous * Remove `cat()` calls in functions. +* Implement helper `check_input()` to validate input before passing them to + other functions. # MotifPeeker 0.99.5 / 0.99.6 diff --git a/R/check_ENCODE.R b/R/check_ENCODE.R index 71b61b8..0c15d84 100644 --- a/R/check_ENCODE.R +++ b/R/check_ENCODE.R @@ -9,8 +9,7 @@ #' thrown. #' @inheritParams MotifPeeker #' -#' @returns A character string specifying the path to the downloaded file. If -#' the input is not in ENCODE ID format, the input is returned as-is. +#' @returns A character string specifying the path to the downloaded file. #' #' @examples #' if (requireNamespace("curl", quietly = TRUE) && @@ -20,10 +19,12 @@ #' #' @export check_ENCODE <- function(encode_id, expect_format, verbose = FALSE) { - if (!all(is.character(encode_id))) return(encode_id) ### Validate ENCODE ID ### + stp_msg <- "Input is not a ENCODE ID string." id_pattern <- "^ENC(SR|BS|DO|GM|AB|LB|FF|PL)\\d{3}[A-Z]{3}$" - if (!all(grepl(id_pattern, encode_id))) return(encode_id) + if (!(all(is.character(encode_id)) && all(grepl(id_pattern, encode_id)))) { + stop(stp_msg) + } ### Verify existence of file on ENCODE ### check_dep("curl") diff --git a/R/check_JASPAR.R b/R/check_JASPAR.R index aa07ff8..86ba5a9 100644 --- a/R/check_JASPAR.R +++ b/R/check_JASPAR.R @@ -6,8 +6,7 @@ #' @inheritParams link_JASPAR #' @inheritParams MotifPeeker #' -#' @returns A character string specifying the path to the downloaded file. If -#' the input is not in JASPAR ID format, the input is returned as-is. +#' @returns A character string specifying the path to the downloaded file. #' #' @examples #' check_JASPAR("MA1930.2") @@ -15,9 +14,10 @@ #' @export check_JASPAR <- function(motif_id, verbose = FALSE) { ### Validate JASPAR ID ### - if (!is.character(motif_id)) return(motif_id) - if (!startsWith(motif_id, "MA")) return(motif_id) + stp_msg <- "Input is not a JASPAR ID string." + if (!(is.character(motif_id) && startsWith(motif_id, "MA"))) stop(stp_msg) ### Fetch file ### return(use_cache(link_JASPAR(motif_id, download = TRUE), verbose = verbose)) } + diff --git a/R/check_input.R b/R/check_input.R new file mode 100644 index 0000000..45a75f3 --- /dev/null +++ b/R/check_input.R @@ -0,0 +1,34 @@ +#' Check for input validity and pass to appropriate function +#' +#' @param x The input to check. +#' @param type The type of input to check for. Supported types are: +#' \itemize{ +#' \item \code{jaspar_id}: JASPAR identifier. +#' \item \code{motif}: `universalmotif` motif object. +#' \item \code{encode_id}: ENCODE identifier. +#' } +#' @param FUN The function to pass the input to. +#' @param inverse Logical indicating whether to return the input if it is +#' invalid for the specified `type`. +#' @param ... Additional arguments to pass to the `FUN` function. +#' +#' @returns `x` if the input is invalid for the specified `type`, or else the +#' output of the `FUN` function. If `inverse = TRUE`, the function returns the +#' output of the `FUN` function if the input is valid, or else `x`. +#' +#' @keywords internal +check_input <- function(x, type, FUN, inverse = FALSE, ...) { + valid <- switch( + tolower(type), + jaspar_id = is.character(x) && startsWith(x, "MA"), + encode_id = { + id_pattern <- "^ENC(SR|BS|DO|GM|AB|LB|FF|PL)\\d{3}[A-Z]{3}$" + all(is.character(x)) && all(grepl(id_pattern, x)) + }, + motif = inherits(x, "universalmotif"), + stop("Invalid type specified.") + ) + + if (inverse) ifelse(valid, return(x), return(FUN(x, ...))) + ifelse(valid, return(FUN(x, ...)), return(x)) +} diff --git a/R/link_JASPAR.R b/R/link_JASPAR.R index 8d547f1..dfc859e 100644 --- a/R/link_JASPAR.R +++ b/R/link_JASPAR.R @@ -4,14 +4,13 @@ #' @param download A logical specifying whether to return a download link or an #' HTML embeddable matrix profile link. (default = FALSE) #' -#' @returns A character string containing the JASPAR motif link. If -#' \code{motif_id} is not a valid JASPAR motif ID, it is returned as is. +#' @returns A character string containing the JASPAR motif link. #' #' @keywords internal link_JASPAR <- function(motif_id, download = FALSE) { - if (is.na(motif_id) || !startsWith(motif_id, "MA")) { - return(motif_id) - } + stp_msg <- "Input is not a JASPAR ID string." + if (is.na(motif_id) || !startsWith(motif_id, "MA")) stop(stp_msg) + if (download) { ## Return download link return(paste0("https://jaspar.elixir.no/api/v1/matrix/", diff --git a/R/read_motif_file.R b/R/read_motif_file.R index 73b6b3b..6908df8 100644 --- a/R/read_motif_file.R +++ b/R/read_motif_file.R @@ -32,9 +32,11 @@ read_motif_file <- function(motif_file, file_format = "auto", verbose = FALSE) { ### Check if motif_file is a universalmotif object ### + if (inherits(motif_file, "universalmotif")) { - return(motif_file) + stop("The motif_file is already a universalmotif object.") } + if (!is.character(motif_file)) stop("Input must be a character string.") ### Load supported read functions ### read_functions <- list( diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd index 88fb6b9..276097b 100644 --- a/inst/markdown/MotifPeeker.Rmd +++ b/inst/markdown/MotifPeeker.Rmd @@ -70,19 +70,30 @@ if (is.null(params$exp_type)) { } else { exp_types <- params$exp_type } -peak_files_encode <- Vectorize(check_ENCODE, "encode_id")( - params$peak_files, expect_format = c("narrowPeak", "bed")) -alignment_files_encode <- Vectorize(check_ENCODE, "encode_id")( - params$alignment_files, expect_format = "bam") +peak_files_encode <- + Vectorize(function(x) check_input( + x, "encode_id", check_ENCODE, expect_format = c("narrowPeak", "bed"), + verbose = FALSE), "x")(params$peak_files) +alignment_files_encode <- + Vectorize(function(x) check_input( + x, "encode_id", check_ENCODE, expect_format = c("bam"), + verbose = FALSE), "x")(params$alignment_files) result <- list( peaks = Vectorize(read_peak_file, "peak_file")(peak_files_encode), alignments = lapply(alignment_files_encode, Rsamtools::BamFile), exp_labels = params$exp_labels, - exp_type = unname(Vectorize(MotifPeeker:::format_exptype, "exp_type")(exp_types)) + exp_type = unname(Vectorize(MotifPeeker:::format_exptype, + "exp_type")(exp_types)) ) -motif_files_jaspar <- Vectorize(check_JASPAR, "motif_id")(params$motif_files) +motif_files_jaspar <- Vectorize( + function(x) check_input(x, "jaspar_id", check_JASPAR, + verbose = params$verbose), "x")(params$motif_files) user_motifs <- list( - motifs = Vectorize(read_motif_file, "motif_file")(motif_files_jaspar), + # motifs = Vectorize(read_motif_file, "motif_file")(motif_files_jaspar), + motifs = Vectorize( + function(x) check_input(x, "motif", read_motif_file, inverse = TRUE, + verbose = params$verbose), + "x")(motif_files_jaspar), motif_labels = params$motif_labels ) result$alignments <- unname(result$alignments) diff --git a/man/check_ENCODE.Rd b/man/check_ENCODE.Rd index 685bb0a..b0af06c 100644 --- a/man/check_ENCODE.Rd +++ b/man/check_ENCODE.Rd @@ -17,8 +17,7 @@ thrown.} running the function. (default = FALSE)} } \value{ -A character string specifying the path to the downloaded file. If -the input is not in ENCODE ID format, the input is returned as-is. +A character string specifying the path to the downloaded file. } \description{ Check and get files from ENCODE project. Requires the input to be in ENCODE diff --git a/man/check_JASPAR.Rd b/man/check_JASPAR.Rd index 8a7abb6..381afa3 100644 --- a/man/check_JASPAR.Rd +++ b/man/check_JASPAR.Rd @@ -13,8 +13,7 @@ check_JASPAR(motif_id, verbose = FALSE) running the function. (default = FALSE)} } \value{ -A character string specifying the path to the downloaded file. If -the input is not in JASPAR ID format, the input is returned as-is. +A character string specifying the path to the downloaded file. } \description{ Check and get files from JASPAR. Requires the input to be in JASPAR diff --git a/man/check_input.Rd b/man/check_input.Rd new file mode 100644 index 0000000..c88e2cf --- /dev/null +++ b/man/check_input.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_input.R +\name{check_input} +\alias{check_input} +\title{Check for input validity and pass to appropriate function} +\usage{ +check_input(x, type, FUN, inverse = FALSE, ...) +} +\arguments{ +\item{x}{The input to check.} + +\item{type}{The type of input to check for. Supported types are: +\itemize{ + \item \code{jaspar_id}: JASPAR identifier. + \item \code{motif}: `universalmotif` motif object. + \item \code{encode_id}: ENCODE identifier. +}} + +\item{FUN}{The function to pass the input to.} + +\item{inverse}{Logical indicating whether to return the input if it is +invalid for the specified `type`.} + +\item{...}{Additional arguments to pass to the `FUN` function.} +} +\value{ +`x` if the input is invalid for the specified `type`, or else the +output of the `FUN` function. If `inverse = TRUE`, the function returns the +output of the `FUN` function if the input is valid, or else `x`. +} +\description{ +Check for input validity and pass to appropriate function +} +\keyword{internal} diff --git a/man/link_JASPAR.Rd b/man/link_JASPAR.Rd index c7e3396..63b8fdc 100644 --- a/man/link_JASPAR.Rd +++ b/man/link_JASPAR.Rd @@ -13,8 +13,7 @@ link_JASPAR(motif_id, download = FALSE) HTML embeddable matrix profile link. (default = FALSE)} } \value{ -A character string containing the JASPAR motif link. If -\code{motif_id} is not a valid JASPAR motif ID, it is returned as is. +A character string containing the JASPAR motif link. } \description{ Get JASPAR link for motifs diff --git a/tests/testthat/test-check_ENCODE.R b/tests/testthat/test-check_ENCODE.R index a0d7fcf..813c09f 100644 --- a/tests/testthat/test-check_ENCODE.R +++ b/tests/testthat/test-check_ENCODE.R @@ -1,7 +1,7 @@ skip_if_offline() test_that("check_ENCODE works", { - expect_equal(check_ENCODE("/a/path"), "/a/path") + expect_error(check_ENCODE("/a/path")) valid_file <- check_ENCODE("ENCFF920TXI", expect_format = c("bed", "gz")) expect_true(grepl("ENCFF920TXI.bed.gz", valid_file)) @@ -11,4 +11,10 @@ test_that("check_ENCODE works", { ## Not a file expect_error(check_ENCODE("ENCSR398OAO", expect_format = c("bam", "bed", "gz"))) + + ## check_input + expect_equal(check_input("/a/path", "encode_id", check_ENCODE), "/a/path") + valid_file2 <- check_input("ENCFF920TXI", "encode_id", check_ENCODE, + expect_format = c("bed", "gz")) + expect_true(grepl("ENCFF920TXI.bed.gz", valid_file2)) }) diff --git a/tests/testthat/test-check_JASPAR.R b/tests/testthat/test-check_JASPAR.R index e6ca235..65e0450 100644 --- a/tests/testthat/test-check_JASPAR.R +++ b/tests/testthat/test-check_JASPAR.R @@ -1,9 +1,12 @@ skip_if_offline() test_that("check_JASPAR works", { - expect_equal(check_JASPAR("/a/path.jaspar"), "/a/path.jaspar") + expect_error(check_JASPAR("/a/path.jaspar")) motif <- check_JASPAR("MA1930.2") expect_true(grepl("MA1930.2.jaspar", motif)) - expect_equal(check_JASPAR(1), 1) + + ## check_input + expect_equal(check_input("/a/path.jaspar", "jaspar_id", check_JASPAR), + "/a/path.jaspar") }) diff --git a/tests/testthat/test-read_motif_file.R b/tests/testthat/test-read_motif_file.R index 3ceaa49..1e13ce8 100644 --- a/tests/testthat/test-read_motif_file.R +++ b/tests/testthat/test-read_motif_file.R @@ -1,10 +1,12 @@ test_that("read_motif_file can read jaspar motif file", { motif_file <- system.file("extdata", "motif_MA1930.2.jaspar", package = "MotifPeeker") - motif <- read_motif_file(motif_file, motif_id = "MA1930.2", - file_format = "jaspar") + # motif <- read_motif_file(motif_file, motif_id = "MA1930.2", + # file_format = "jaspar") + motif <- check_input(motif_file, "motif", read_motif_file, inverse = TRUE, + motif_id = "MA1930.2", file_format = "jaspar") expect_s4_class(motif, "universalmotif") - expect_s4_class(read_motif_file(motif), "universalmotif") + expect_error(read_motif_file(motif)) }) test_that("read_motif_file can infer motif file-format", { motif_file <- system.file("extdata", "motif_MA1930.2.jaspar", @@ -13,7 +15,6 @@ test_that("read_motif_file can infer motif file-format", { expect_s4_class(motif, "universalmotif") }) test_that("read_motif_file fails with invalid data", { - motif_file <- system.file("extdata","CTCF_ChIP_peaks.narrowPeak", - package = "MotifPeeker") + motif_file <- system.file("extdata","CTCF_ChIP_peaks.narrowPeak") expect_error(read_motif_file(motif_file, motif_id = "MA1930.2")) }) From 33124b46a8064323c31d718524ef26144c71071a Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:15:46 +0000 Subject: [PATCH 3/6] Only run tests if MEME Suite is installed (if required) --- tests/testthat/test-MotifPeeker.R | 2 ++ tests/testthat/test-confirm_meme_install.R | 2 ++ tests/testthat/test-denovo_motif_funcs.R | 2 ++ tests/testthat/test-enrichment_funcs.R | 2 ++ tests/testthat/test-get_df_distances.R | 2 ++ tests/testthat/test-markov_background_model.R | 2 ++ tests/testthat/test-motif_enrichment.R | 2 ++ tests/testthat/test-summit_to_motif.R | 2 ++ 8 files changed, 16 insertions(+) diff --git a/tests/testthat/test-MotifPeeker.R b/tests/testthat/test-MotifPeeker.R index b731490..a4e9a4e 100644 --- a/tests/testthat/test-MotifPeeker.R +++ b/tests/testthat/test-MotifPeeker.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("MotifPeeker fails without genome_build input", { expect_error(MotifPeeker()) }) diff --git a/tests/testthat/test-confirm_meme_install.R b/tests/testthat/test-confirm_meme_install.R index 4295903..a205d26 100644 --- a/tests/testthat/test-confirm_meme_install.R +++ b/tests/testthat/test-confirm_meme_install.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("confirm_meme_install works", { if (memes::meme_is_installed()) { expect_silent(confirm_meme_install()) diff --git a/tests/testthat/test-denovo_motif_funcs.R b/tests/testthat/test-denovo_motif_funcs.R index 3ae4fd2..3e5add4 100644 --- a/tests/testthat/test-denovo_motif_funcs.R +++ b/tests/testthat/test-denovo_motif_funcs.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("De-novo motif enrichment functions works", { data("CTCF_TIP_peaks", package = "MotifPeeker") data("CTCF_ChIP_peaks", package = "MotifPeeker") diff --git a/tests/testthat/test-enrichment_funcs.R b/tests/testthat/test-enrichment_funcs.R index 4909492..e98b360 100644 --- a/tests/testthat/test-enrichment_funcs.R +++ b/tests/testthat/test-enrichment_funcs.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("enrichment plotting and datatable functions works", { ### Prepare input ### data("CTCF_ChIP_peaks", package = "MotifPeeker") diff --git a/tests/testthat/test-get_df_distances.R b/tests/testthat/test-get_df_distances.R index 2ccaaf9..b90b46e 100644 --- a/tests/testthat/test-get_df_distances.R +++ b/tests/testthat/test-get_df_distances.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("get_df_distances works", { data("CTCF_ChIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") diff --git a/tests/testthat/test-markov_background_model.R b/tests/testthat/test-markov_background_model.R index 4a53d40..c1173c7 100644 --- a/tests/testthat/test-markov_background_model.R +++ b/tests/testthat/test-markov_background_model.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("file created by markov_background_model function", { temp_dir <- withr::local_tempdir() diff --git a/tests/testthat/test-motif_enrichment.R b/tests/testthat/test-motif_enrichment.R index 2e6cc3c..e2c6260 100644 --- a/tests/testthat/test-motif_enrichment.R +++ b/tests/testthat/test-motif_enrichment.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("list outputted by motif_enrichment function", { data("CTCF_TIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") diff --git a/tests/testthat/test-summit_to_motif.R b/tests/testthat/test-summit_to_motif.R index 43fca85..daf0852 100644 --- a/tests/testthat/test-summit_to_motif.R +++ b/tests/testthat/test-summit_to_motif.R @@ -1,3 +1,5 @@ +skip_if_not(memes::meme_is_installed(), "MEME is not installed") + test_that("list outputted by summit_to_motif function", { temp_dir <- withr::local_tempdir() From 1aa710f0ca2e237e9dc9ffb01575e2072ceb0997 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:59:16 +0000 Subject: [PATCH 4/6] Run examples and tests conditionally based on MEME Suite installation status --- NEWS.md | 2 ++ R/MotifPeeker.R | 2 ++ R/denovo_motifs.R | 2 ++ R/find_motifs.R | 2 ++ R/get_df_distances.R | 2 ++ R/get_df_enrichment.R | 2 ++ R/motif_enrichment.R | 2 ++ R/motif_similarity.R | 2 ++ R/summit_to_motif.R | 2 ++ README.md | 6 +++--- man/MotifPeeker.Rd | 2 ++ man/denovo_motifs.Rd | 2 ++ man/find_motifs.Rd | 2 ++ man/get_df_distances.Rd | 2 ++ man/get_df_enrichment.Rd | 2 ++ man/motif_enrichment.Rd | 2 ++ man/motif_similarity.Rd | 2 ++ man/summit_to_motif.Rd | 2 ++ 18 files changed, 37 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e4097e7..3f129d3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ * Remove `cat()` calls in functions. * Implement helper `check_input()` to validate input before passing them to other functions. +* Run examples and tests only if MEME Suite is detected (only for functions +which require MEME Suite). # MotifPeeker 0.99.5 / 0.99.6 diff --git a/R/MotifPeeker.R b/R/MotifPeeker.R index e55a042..a41b395 100644 --- a/R/MotifPeeker.R +++ b/R/MotifPeeker.R @@ -142,6 +142,7 @@ #' ) #' #' \donttest{ +#' if (memes::meme_is_installed()) { #' # MotifPeeker takes time to run #' MotifPeeker( #' peak_files = peaks, @@ -163,6 +164,7 @@ #' quiet = TRUE, #' verbose = FALSE #' ) +#' } #' } #' #' @export diff --git a/R/denovo_motifs.R b/R/denovo_motifs.R index b3e4a7a..8212395 100644 --- a/R/denovo_motifs.R +++ b/R/denovo_motifs.R @@ -36,6 +36,7 @@ #' associated metadata. #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { #' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 @@ -48,6 +49,7 @@ #' out_dir = tempdir()) #' print(res[[1]]$consensus) #' } +#' } #' @export denovo_motifs <- function(seqs, trim_seq_width, diff --git a/R/find_motifs.R b/R/find_motifs.R index c33d241..50b7ccc 100644 --- a/R/find_motifs.R +++ b/R/find_motifs.R @@ -23,6 +23,7 @@ #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' #' \donttest{ +#' if (memes::meme_is_installed()) { #' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { #' genome_build <- #' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 @@ -37,6 +38,7 @@ #' out_dir = tempdir()) #' print(res2) #' } +#' } #' } #' #' @export diff --git a/R/get_df_distances.R b/R/get_df_distances.R index 1a8c48d..e399b31 100644 --- a/R/get_df_distances.R +++ b/R/get_df_distances.R @@ -35,6 +35,7 @@ #' } #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_ChIP_peaks", package = "MotifPeeker") #' data("motif_MA1102.3", package = "MotifPeeker") #' data("motif_MA1930.2", package = "MotifPeeker") @@ -56,6 +57,7 @@ #' workers = 1) #' print(distances_df) #' } +#' } #' #' @family generate data.frames #' diff --git a/R/get_df_enrichment.R b/R/get_df_enrichment.R index e72f8f6..d753e0c 100644 --- a/R/get_df_enrichment.R +++ b/R/get_df_enrichment.R @@ -26,6 +26,7 @@ #' } #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_ChIP_peaks", package = "MotifPeeker") #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' data("motif_MA1102.3", package = "MotifPeeker") @@ -55,6 +56,7 @@ #' ) #' } #' } +#' } #' #' @family generate data.frames #' diff --git a/R/motif_enrichment.R b/R/motif_enrichment.R index d0eb089..1465140 100644 --- a/R/motif_enrichment.R +++ b/R/motif_enrichment.R @@ -24,6 +24,7 @@ #' to the proportion of peaks with a motif. #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' data("motif_MA1102.3", package = "MotifPeeker") #' @@ -36,6 +37,7 @@ #' ) #' print(res) #' } +#' } #' #' @seealso \code{\link[memes]{runAme}} #' diff --git a/R/motif_similarity.R b/R/motif_similarity.R index 937826f..1b5ee40 100644 --- a/R/motif_similarity.R +++ b/R/motif_similarity.R @@ -30,6 +30,7 @@ #' The list is repeated for each set of comparison groups in input. #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' data("CTCF_ChIP_peaks", package = "MotifPeeker") #' @@ -49,6 +50,7 @@ #' print(similarity_matrices) #' } #' } +#' } #' #' @export motif_similarity <- function(streme_out, diff --git a/R/summit_to_motif.R b/R/summit_to_motif.R index daf2a2d..a19d9e8 100644 --- a/R/summit_to_motif.R +++ b/R/summit_to_motif.R @@ -30,6 +30,7 @@ #' distances for each valid peak. #' #' @examples +#' if (memes::meme_is_installed()) { #' data("CTCF_TIP_peaks", package = "MotifPeeker") #' data("motif_MA1102.3", package = "MotifPeeker") #' @@ -40,6 +41,7 @@ #' genome_build = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 #' ) #' print(res) +#' } #' #' @seealso \code{\link[memes]{runAme}} #' diff --git a/README.md b/README.md index 547c60a..3e8db7b 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ status](https://github.com/neurogenomics/MotifPeeker/workflows/rworkflows/badge. **Authors:** ***Hiranyamaya (Hiru) Dash, Thomas Roberts, Nathan Skene*** -**Updated:** ***Nov-01-2024*** +**Updated:** ***Nov-04-2024*** ## Introduction @@ -380,12 +380,12 @@ utils::sessionInfo() ## [19] desc_1.4.3 dlstats_0.1.7 rprojroot_2.0.4 ## [22] munsell_0.5.1 pillar_1.9.0 RColorBrewer_1.1-3 ## [25] rlang_1.1.4 utf8_1.2.4 badger_0.2.4 - ## [28] xfun_0.48 fs_1.6.4 cli_3.6.3 + ## [28] xfun_0.49 fs_1.6.5 cli_3.6.3 ## [31] magrittr_2.0.3 rworkflows_1.0.2 digest_0.6.37 ## [34] grid_4.4.1 rstudioapi_0.17.1 lifecycle_1.0.4 ## [37] vctrs_0.6.5 evaluate_1.0.1 glue_1.8.0 ## [40] data.table_1.16.2 fansi_1.0.6 colorspace_2.1-1 - ## [43] rmarkdown_2.28 tools_4.4.1 pkgconfig_2.0.3 + ## [43] rmarkdown_2.29 tools_4.4.1 pkgconfig_2.0.3 ## [46] htmltools_0.5.8.1 diff --git a/man/MotifPeeker.Rd b/man/MotifPeeker.Rd index b2672c3..f88967e 100644 --- a/man/MotifPeeker.Rd +++ b/man/MotifPeeker.Rd @@ -207,6 +207,7 @@ motifs <- list( ) \donttest{ + if (memes::meme_is_installed()) { # MotifPeeker takes time to run MotifPeeker( peak_files = peaks, @@ -228,6 +229,7 @@ motifs <- list( quiet = TRUE, verbose = FALSE ) + } } } diff --git a/man/denovo_motifs.Rd b/man/denovo_motifs.Rd index 628c77d..1690cb2 100644 --- a/man/denovo_motifs.Rd +++ b/man/denovo_motifs.Rd @@ -77,6 +77,7 @@ the search space. The result is then optionally filtered to remove motifs with a high number of nucleotide repeats } \examples{ +if (memes::meme_is_installed()) { data("CTCF_TIP_peaks", package = "MotifPeeker") if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 @@ -90,3 +91,4 @@ if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { print(res[[1]]$consensus) } } +} diff --git a/man/find_motifs.Rd b/man/find_motifs.Rd index 0ba9aca..434bf98 100644 --- a/man/find_motifs.Rd +++ b/man/find_motifs.Rd @@ -61,6 +61,7 @@ Light wrapper around \code{TOMTOM} from MEME Suite. data("CTCF_TIP_peaks", package = "MotifPeeker") \donttest{ + if (memes::meme_is_installed()) { if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 @@ -75,6 +76,7 @@ data("CTCF_TIP_peaks", package = "MotifPeeker") out_dir = tempdir()) print(res2) } + } } } diff --git a/man/get_df_distances.Rd b/man/get_df_distances.Rd index f8213b5..f73af2c 100644 --- a/man/get_df_distances.Rd +++ b/man/get_df_distances.Rd @@ -65,6 +65,7 @@ for all peaks and motifs, generating a \code{data.frame} suitable for plots. } \examples{ +if (memes::meme_is_installed()) { data("CTCF_ChIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") data("motif_MA1930.2", package = "MotifPeeker") @@ -86,6 +87,7 @@ if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { workers = 1) print(distances_df) } +} } \seealso{ diff --git a/man/get_df_enrichment.Rd b/man/get_df_enrichment.Rd index 5a1f182..e9d67be 100644 --- a/man/get_df_enrichment.Rd +++ b/man/get_df_enrichment.Rd @@ -80,6 +80,7 @@ suitable for plots. The \code{data.frame} contains values for all and segregated peaks. } \examples{ +if (memes::meme_is_installed()) { data("CTCF_ChIP_peaks", package = "MotifPeeker") data("CTCF_TIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") @@ -109,6 +110,7 @@ reference_index <- 1 ) } } +} } \seealso{ diff --git a/man/motif_enrichment.Rd b/man/motif_enrichment.Rd index ba68f56..189cb34 100644 --- a/man/motif_enrichment.Rd +++ b/man/motif_enrichment.Rd @@ -53,6 +53,7 @@ background sequences using Analysis of Motif Enrichment (AME) from \link{memes}. } \examples{ +if (memes::meme_is_installed()) { data("CTCF_TIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") @@ -65,6 +66,7 @@ data("motif_MA1102.3", package = "MotifPeeker") ) print(res) } +} } \seealso{ diff --git a/man/motif_similarity.Rd b/man/motif_similarity.Rd index ff79ef7..a928ee1 100644 --- a/man/motif_similarity.Rd +++ b/man/motif_similarity.Rd @@ -152,6 +152,7 @@ are of little use for motifs with a different number of alphabet letters } } \examples{ +if (memes::meme_is_installed()) { data("CTCF_TIP_peaks", package = "MotifPeeker") data("CTCF_ChIP_peaks", package = "MotifPeeker") @@ -171,5 +172,6 @@ data("CTCF_ChIP_peaks", package = "MotifPeeker") print(similarity_matrices) } } +} } diff --git a/man/summit_to_motif.Rd b/man/summit_to_motif.Rd index ba23c24..9c093ba 100644 --- a/man/summit_to_motif.Rd +++ b/man/summit_to_motif.Rd @@ -82,6 +82,7 @@ the approximate formula: \href{https://meme-suite.org/meme/doc/fimo-tutorial.html}{FIMO documentation}) } \examples{ +if (memes::meme_is_installed()) { data("CTCF_TIP_peaks", package = "MotifPeeker") data("motif_MA1102.3", package = "MotifPeeker") @@ -92,6 +93,7 @@ res <- summit_to_motif( genome_build = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 ) print(res) +} } \seealso{ From 613fefee385bc6ea3bf7b5daab7e553fd3345379 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:28:26 +0000 Subject: [PATCH 5/6] Run examples conditionally based on MEME Suite installation status --- R/plot_enrichment_individual.R | 3 ++- R/plot_enrichment_overall.R | 3 ++- man/plot_enrichment_individual.Rd | 3 ++- man/plot_enrichment_overall.Rd | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/plot_enrichment_individual.R b/R/plot_enrichment_individual.R index e4092f8..7445fc1 100644 --- a/R/plot_enrichment_individual.R +++ b/R/plot_enrichment_individual.R @@ -42,7 +42,8 @@ #' ) #' #' \donttest{ -#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { +#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38") && +#' memes::meme_is_installed()) { #' genome_build <- #' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 #' enrichment_df <- get_df_enrichment( diff --git a/R/plot_enrichment_overall.R b/R/plot_enrichment_overall.R index 4ea028f..c12e11a 100644 --- a/R/plot_enrichment_overall.R +++ b/R/plot_enrichment_overall.R @@ -40,7 +40,8 @@ #' ) #' #' \donttest{ -#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { +#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38") && +#' memes::meme_is_installed()) { #' genome_build <- #' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 #' enrichment_df <- get_df_enrichment( diff --git a/man/plot_enrichment_individual.Rd b/man/plot_enrichment_individual.Rd index 4b65b89..5c7e09a 100644 --- a/man/plot_enrichment_individual.Rd +++ b/man/plot_enrichment_individual.Rd @@ -72,7 +72,8 @@ motifs <- list( ) \donttest{ - if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { + if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38") && + memes::meme_is_installed()) { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 enrichment_df <- get_df_enrichment( diff --git a/man/plot_enrichment_overall.Rd b/man/plot_enrichment_overall.Rd index 75f2078..d961211 100644 --- a/man/plot_enrichment_overall.Rd +++ b/man/plot_enrichment_overall.Rd @@ -60,7 +60,8 @@ motifs <- list( ) \donttest{ - if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { + if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38") && + memes::meme_is_installed()) { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 enrichment_df <- get_df_enrichment( From 2b1f51db646aae41526f8072f185c6634b87bf3b Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:33:06 +0000 Subject: [PATCH 6/6] Replace `workers` with `BPPARAM` argument and remove `get_bpparam()` --- DESCRIPTION | 1 - NAMESPACE | 1 + NEWS.md | 4 ++ R/MotifPeeker.R | 63 ++++++++++++++---------- R/bpapply.R | 15 ++---- R/denovo_motifs.R | 9 ++-- R/find_motifs.R | 5 +- R/get_bpparam.R | 47 ------------------ R/get_df_distances.R | 7 ++- R/get_df_enrichment.R | 8 +-- R/motif_similarity.R | 8 +-- R/plot_enrichment_individual.R | 2 +- R/plot_enrichment_overall.R | 2 +- README.md | 2 +- inst/markdown/MotifPeeker.Rmd | 12 ++--- man/MotifPeeker.Rd | 56 ++++++++++++--------- man/bpapply.Rd | 26 ++-------- man/denovo_motifs.Rd | 9 ++-- man/find_motifs.Rd | 9 ++-- man/get_bpparam.Rd | 41 --------------- man/get_df_distances.Rd | 20 +++++--- man/get_df_enrichment.Rd | 19 +++++-- man/motif_similarity.Rd | 8 +-- man/plot_enrichment_individual.Rd | 2 +- man/plot_enrichment_overall.Rd | 2 +- tests/testthat/test-MotifPeeker.R | 3 -- tests/testthat/test-bpapply.R | 9 +--- tests/testthat/test-denovo_motif_funcs.R | 4 +- tests/testthat/test-enrichment_funcs.R | 3 +- tests/testthat/test-get_df_distances.R | 2 +- vignettes/MotifPeeker.Rmd | 10 ++-- vignettes/troubleshooting.Rmd | 2 +- 32 files changed, 168 insertions(+), 243 deletions(-) delete mode 100644 R/get_bpparam.R delete mode 100644 man/get_bpparam.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 359a3e8..46513fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -64,7 +64,6 @@ Imports: stats, utils Suggests: - BiocStyle, BSgenome.Hsapiens.UCSC.hg19, BSgenome.Hsapiens.UCSC.hg38, downloadthis, diff --git a/NAMESPACE b/NAMESPACE index aa91489..cf3dd3a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ importFrom(BSgenome,getSeq) importFrom(BiocFileCache,BiocFileCache) importFrom(BiocFileCache,bfcinfo) importFrom(BiocFileCache,bfcrpath) +importFrom(BiocParallel,bpnworkers) importFrom(Biostrings,DNAString) importFrom(Biostrings,letterFrequency) importFrom(DT,datatable) diff --git a/NEWS.md b/NEWS.md index 3f129d3..e614659 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # MotifPeeker 0.99.7 +## New Features +* Replace `workers` argument with `BPPARAM`. Give users more control over the +BiocParallel implementation. + ## Miscellaneous * Remove `cat()` calls in functions. diff --git a/R/MotifPeeker.R b/R/MotifPeeker.R index a41b395..e934542 100644 --- a/R/MotifPeeker.R +++ b/R/MotifPeeker.R @@ -11,28 +11,28 @@ #' hours to complete. To make computation faster, we highly recommend tuning the #' following arguments: #' \describe{ -#' \item{\code{workers}}{Running motif discovery in parallel can -#' significantly reduce runtime, but it is very memory-intensive, consuming -#' upwards of 10GB of RAM per thread. Memory starvation can greatly slow the -#' process, so set \code{workers} with caution.} -#' \item{\code{denovo_motifs}}{The number of motifs to discover per sequence -#' group exponentially increases runtime. We recommend no more than 5 -#' motifs to make a meaningful inference.} -#' \item{\code{trim_seq_width}}{Trimming sequences before running de-novo -#' motif discovery can significantly reduce the search space. Sequence -#' length can exponentially increase runtime. We recommend running the -#' script with \code{denovo_motif_discovery = FALSE} and studying the -#' motif-summit distance distribution under general metrics to find the -#' sequence length that captures most motifs. A good starting point is 150 -#' but it can be reduced further if appropriate.} +#' \item{\code{BPPARAM=MulticoreParam(x)}}{Running motif discovery in +#' parallel can significantly reduce runtime, but it is very +#' memory-intensive, consuming 10+GB of RAM per thread. Memory starvation can +#' greatly slow the process, so set the number of cores with caution.} +#' \item{\code{denovo_motifs}}{The number of motifs to discover per sequence +#' group exponentially increases runtime. We recommend no more than 5 +#' motifs to make a meaningful inference.} +#' \item{\code{trim_seq_width}}{Trimming sequences before running de-novo +#' motif discovery can significantly reduce the search space. Sequence +#' length can exponentially increase runtime. We recommend running the +#' script with \code{denovo_motif_discovery = FALSE} and studying the +#' motif-summit distance distribution under general metrics to find the +#' sequence length that captures most motifs. A good starting point is 150 +#' but it can be reduced further if appropriate.} #' } #' #' @param peak_files A character vector of path to peak files, or a vector of #' GRanges objects generated using \code{\link{read_peak_file}}. Currently, #' peak files from the following peak-calling tools are supported: #' \itemize{ -#' \item MACS2: \code{.narrowPeak} files -#' \item SEACR: \code{.bed} files +#' \item MACS2: \code{.narrowPeak} files +#' \item SEACR: \code{.bed} files #' } #' ENCODE file IDs can also be provided to automatically fetch peak file(s) from #' the ENCODE database. @@ -81,13 +81,22 @@ #' @param display A character vector specifying the display mode for the HTML #' report once it is generated. (default = NULL) Options are: #' \itemize{ -#' \item \code{"browser"}: Open the report in the default web browser. -#' \item \code{"rstudio"}: Open the report in the RStudio Viewer. -#' \item \code{NULL}: Do not open the report. +#' \item \code{"browser"}: Open the report in the default web browser. +#' \item \code{"rstudio"}: Open the report in the RStudio Viewer. +#' \item \code{NULL}: Do not open the report. #' } -#' @param workers An integer specifying the number of threads to use for -#' parallel processing. (default = 1)\cr -#' \strong{IMPORTANT:} For each worker, please ensure a minimum of 6GB of +#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object +#' enabling parallel execution. (default = SerialParam(), single-CPU run)\cr\cr +#' Following are two examples of how to set up parallel processing: +#' \itemize{ +#' \item \code{BPPARAM = BiocParallel::MulticoreParam(4)}: Uses 4 +#' CPU cores for parallel processing. +#' \item \code{library("BiocParallel")} followed by +#' \code{register(MulticoreParam(4))} sets all subsequent BiocParallel +#' functions to use 4 CPU cores. \code{Motifpeeker()} must be run +#' with \code{BPPARAM = BiocParallel::MulticoreParam()}. +#' } +#' \strong{IMPORTANT:} For each worker, please ensure a minimum of 8GB of #' memory (RAM) is available as \code{denovo_motif_discovery} is #' memory-intensive. #' @param quiet A logical indicating whether to print markdown knit messages. @@ -99,7 +108,7 @@ #' @inheritParams check_genome_build #' @inheritParams read_motif_file #' @inheritParams check_genome_build -#' @inheritParams get_bpparam +#' @inheritParams bpapply #' @inheritParams memes::runFimo #' @inheritParams denovo_motifs #' @inheritParams find_motifs @@ -111,6 +120,7 @@ #' @importFrom viridis scale_fill_viridis scale_color_viridis #' @importFrom tools file_path_sans_ext #' @importFrom rmarkdown render +#' @importFrom BiocParallel bpnworkers #' #' @return Path to the output directory. #' @@ -159,7 +169,6 @@ #' motif_db = NULL, #' download_buttons = TRUE, #' out_dir = tempdir(), -#' workers = 1, #' debug = FALSE, #' quiet = TRUE, #' verbose = FALSE @@ -188,7 +197,7 @@ MotifPeeker <- function( out_dir = tempdir(), save_runfiles = FALSE, display = if (interactive()) "browser", - workers = 2, + BPPARAM = BiocParallel::SerialParam(), # Default to single-core quiet = TRUE, debug = FALSE, verbose = FALSE @@ -269,7 +278,7 @@ MotifPeeker <- function( meme_path = meme_path, out_dir = out_dir, save_runfiles = save_runfiles, - workers = workers, + BPPARAM = BPPARAM, debug = debug, verbose = verbose ) @@ -277,6 +286,8 @@ MotifPeeker <- function( ### Knit Rmd ### rmd_file <- system.file("markdown", "MotifPeeker.Rmd", package = "MotifPeeker") + messager("Starting run with", BiocParallel::bpnworkers(BPPARAM), "cores.", + v = verbose) rmarkdown::render( input = rmd_file, output_dir = out_dir, diff --git a/R/bpapply.R b/R/bpapply.R index 12bfe05..69a1047 100644 --- a/R/bpapply.R +++ b/R/bpapply.R @@ -1,15 +1,15 @@ #' Use BiocParallel functions with appropriate parameters #' #' Light wrapper around \code{\link[BiocParallel]{BiocParallel}} functions that -#' automatically sets the appropriate parameters based on the number of workers -#' specified. +#' automatically applies appropriate parallel function. #' #' @param apply_fun A \code{\link[BiocParallel]{BiocParallel}} function to use #' for parallel processing. (default = \code{BiocParallel::bplapply}) +#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object +#' specifying run parameters. (default = bpparam()) #' @inheritParams BiocParallel::bplapply #' @inheritDotParams BiocParallel::bplapply #' @inheritDotParams BiocParallel::bpmapply -#' @inheritParams get_bpparam #' #' @import BiocParallel #' @@ -19,7 +19,7 @@ #' half_it <- function(arg1) return(arg1 / 2) #' x <- seq_len(10) #' -#' res <- MotifPeeker:::bpapply(x, half_it, workers = 2) +#' res <- MotifPeeker:::bpapply(x, half_it) #' print(res) #' #' @keywords internal @@ -27,7 +27,7 @@ bpapply <- function( X, FUN, apply_fun = BiocParallel::bplapply, - workers = 1, + BPPARAM = BiocParallel::bpparam(), progressbar = FALSE, force_snowparam = FALSE, verbose = FALSE, @@ -38,11 +38,6 @@ bpapply <- function( if (length(apply_fun_package) == 0 || apply_fun_package != "BiocParallel") stop(stp_msg) - BPPARAM <- get_bpparam(workers = workers, - progressbar = progressbar, - force_snowparam = force_snowparam, - verbose = verbose) - res <- apply_fun(X, FUN = FUN, BPPARAM = BPPARAM, ...) return(res) } diff --git a/R/denovo_motifs.R b/R/denovo_motifs.R index 8212395..87c2e1e 100644 --- a/R/denovo_motifs.R +++ b/R/denovo_motifs.R @@ -25,12 +25,15 @@ #' (default = 6) #' @param out_dir A \code{character} vector of output directory to save STREME #' results to. (default = \code{tempdir()}) +#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object +#' specifying run parameters. (default = SerialParam(), single core run) +#' @param debug A logical indicating whether to print debug messages while +#' running the function. (default = FALSE) #' @param ... Additional arguments to pass to \code{STREME}. For more #' information, refer to the official MEME Suite documentation on #' \href{https://meme-suite.org/meme/doc/streme.html}{STREME}. #' @inheritParams bpapply #' @inheritParams motif_enrichment -#' @inheritParams MotifPeeker #' #' @returns A list of \code{\link[universalmotif]{universalmotif}} objects and #' associated metadata. @@ -60,7 +63,7 @@ denovo_motifs <- function(seqs, filter_n = 6, out_dir = tempdir(), meme_path = NULL, - workers = 1, + BPPARAM = BiocParallel::SerialParam(), verbose = FALSE, debug = FALSE, ...) { @@ -96,7 +99,7 @@ denovo_motifs <- function(seqs, ### Filter motifs ### out <- filter_repeats(streme_out, filter_n) return(out) - }, workers = workers, verbose = verbose + }, BPPARAM = BPPARAM, verbose = verbose ) messager("STREME run complete.", v = verbose) return(res) diff --git a/R/find_motifs.R b/R/find_motifs.R index 50b7ccc..dcd8dc7 100644 --- a/R/find_motifs.R +++ b/R/find_motifs.R @@ -13,6 +13,7 @@ #' @param ... Additional arguments to pass to \code{TOMTOM}. For more #' information, refer to the official MEME Suite documentation on #' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}. +#' @inheritParams bpapply #' @inheritParams denovo_motifs #' #' @importFrom memes runTomTom @@ -46,7 +47,7 @@ find_motifs <- function(streme_out, motif_db, out_dir = tempdir(), meme_path = NULL, - workers = 1, + BPPARAM = BiocParallel::bpparam(), verbose = FALSE, debug = FALSE, ...) { @@ -66,7 +67,7 @@ find_motifs <- function(streme_out, ) return(res_x) }) - }, workers = workers, verbose = verbose + }, BPPARAM = BPPARAM, verbose = verbose ) return(res) } diff --git a/R/get_bpparam.R b/R/get_bpparam.R deleted file mode 100644 index 097dbc6..0000000 --- a/R/get_bpparam.R +++ /dev/null @@ -1,47 +0,0 @@ -#' Get parameters for \link[BiocParallel]{BiocParallel} -#' -#' Get appropriate parameters for \code{BiocParallel} based on the -#' number of workers specified. For less than 10 workers, the function returns a -#' \code{MulticoreParam} object. For 10 or more cores, the function -#' returns a \code{SnowParam} object. Since Windows supports -#' neither, the function returns a \code{SerialParam} object. As a -#' result, Windows users do not benefit from parallel processing. -#' -#' @param workers The number of workers to use for parallel processing. -#' @param force_snowparam A logical indicating whether to force the use of -#' \link[BiocParallel]{SnowParam} object. -#' @param verbose A logical indicating whether to print verbose messages while -#' running the function. (default = FALSE) -#' @inheritParams BiocParallel::SnowParam -#' -#' @import BiocParallel -#' -#' @returns A \code{BPPARAM} object. -#' -#' @seealso \link[BiocParallel]{BiocParallelParam} -#' -#' @keywords internal -get_bpparam <- function(workers, - progressbar = workers > 1, - force_snowparam = FALSE, - verbose = FALSE) { - if (.Platform$OS.type == "windows") { - custom_bpparam <- BiocParallel::SerialParam() - messager("Windows does not support parallel processing.", - "Returning SerialParam object for BiocParallel.", - v = verbose) - } else if (workers < 10 && !force_snowparam) { - custom_bpparam <- - BiocParallel::MulticoreParam(workers = workers, - progressbar = progressbar) - messager("Using MulticoreParam object for BiocParallel (workers =", - paste0(workers, ")."), v = verbose) - } else { - custom_bpparam <- BiocParallel::SnowParam(workers = workers, - progressbar = progressbar) - messager("Using SnowParam object for BiocParallel (workers =", - paste0(workers, ")."), v = verbose) - } - - return(custom_bpparam) -} diff --git a/R/get_df_distances.R b/R/get_df_distances.R index e399b31..459f437 100644 --- a/R/get_df_distances.R +++ b/R/get_df_distances.R @@ -53,8 +53,7 @@ #' #' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { #' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 -#' distances_df <- get_df_distances(input, motifs, genome_build, -#' workers = 1) +#' distances_df <- get_df_distances(input, motifs, genome_build) #' print(distances_df) #' } #' } @@ -66,7 +65,7 @@ get_df_distances <- function(result, user_motifs, genome_build, out_dir = tempdir(), - workers = 1, + BPPARAM = BiocParallel::bpparam(), meme_path = NULL, verbose = FALSE) { if (!is.list(result$peaks)) result$peaks <- list(result$peaks) @@ -95,7 +94,7 @@ get_df_distances <- function(result, )$distance_to_summit ) }, - workers = workers, verbose = verbose) %>% + BPPARAM = BPPARAM, verbose = verbose) %>% purrr::map_df(as.data.frame) ## Output: Peak 1 - Motif 1, 2... diff --git a/R/get_df_enrichment.R b/R/get_df_enrichment.R index d753e0c..14cd77b 100644 --- a/R/get_df_enrichment.R +++ b/R/get_df_enrichment.R @@ -52,7 +52,7 @@ #' #' enrichment_df <- get_df_enrichment( #' input, segregated_input, motifs, genome_build, -#' reference_index = 1, workers = 1 +#' reference_index = 1 #' ) #' } #' } @@ -67,7 +67,7 @@ get_df_enrichment <- function(result, genome_build, reference_index = 1, out_dir = tempdir(), - workers = 1, + BPPARAM = BiocParallel::bpparam(), meme_path = NULL, verbose = FALSE) { if (!is.list(result$peaks)) result$peaks <- list(result$peaks) @@ -113,7 +113,7 @@ get_df_enrichment <- function(result, run_index = i ) }, - workers = workers, verbose = verbose) %>% + BPPARAM = BPPARAM, verbose = verbose) %>% purrr::map_df(as.data.frame) ## 2. Segregated peaks @@ -165,7 +165,7 @@ get_df_enrichment <- function(result, run_index = i ) }, - workers = workers, verbose = verbose) %>% + BPPARAM = BPPARAM, verbose = verbose) %>% purrr::map_df(as.data.frame) enrichment_df <- rbind(enrichment_df_all, enrichment_df_seg) diff --git a/R/motif_similarity.R b/R/motif_similarity.R index 1b5ee40..5516d04 100644 --- a/R/motif_similarity.R +++ b/R/motif_similarity.R @@ -10,6 +10,7 @@ #' @inheritDotParams universalmotif::compare_motifs #' #' @importFrom universalmotif compare_motifs +#' @importFrom BiocParallel bpnworkers #' #' @inherit universalmotif::compare_motifs details #' @@ -44,8 +45,7 @@ #' genome_build = genome_build, #' denovo_motifs = 2, #' filter_n = 6, -#' out_dir = tempdir(), -#' workers = 1) +#' out_dir = tempdir()) #' similarity_matrices <- motif_similarity(denovo_motifs) #' print(similarity_matrices) #' } @@ -56,7 +56,7 @@ motif_similarity <- function(streme_out, method = "PCC", normalise.scores = TRUE, - workers = 1, + BPPARAM = BiocParallel::bpparam(), ...) { ## Motif group sequence - #1 Common seqs - Reference (1) ## (4 Groups per #2 Common seqs - Comparison (2) @@ -81,7 +81,7 @@ motif_similarity <- function(streme_out, list(m1, m2), method = method, normalise.scores = normalise.scores, - nthreads = workers, + nthreads = BiocParallel::bpnworkers(BPPARAM), ... ) row_indices <- seq(1, length(m1)) diff --git a/R/plot_enrichment_individual.R b/R/plot_enrichment_individual.R index 7445fc1..cdf555f 100644 --- a/R/plot_enrichment_individual.R +++ b/R/plot_enrichment_individual.R @@ -48,7 +48,7 @@ #' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 #' enrichment_df <- get_df_enrichment( #' input, segregated_input, motifs, genome_build, -#' reference_index = 1, workers = 1 +#' reference_index = 1 #' ) #' label_colours <- c("red", "cyan") #' diff --git a/R/plot_enrichment_overall.R b/R/plot_enrichment_overall.R index c12e11a..f2e832d 100644 --- a/R/plot_enrichment_overall.R +++ b/R/plot_enrichment_overall.R @@ -46,7 +46,7 @@ #' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 #' enrichment_df <- get_df_enrichment( #' input, segregated_input, motifs, genome_build, -#' reference_index = 1, workers = 1 +#' reference_index = 1 #' ) #' label_colours <- c("red", "cyan") #' diff --git a/README.md b/README.md index 3e8db7b..e6c7370 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ status](https://github.com/neurogenomics/MotifPeeker/workflows/rworkflows/badge. **Authors:** ***Hiranyamaya (Hiru) Dash, Thomas Roberts, Nathan Skene*** -**Updated:** ***Nov-04-2024*** +**Updated:** ***Nov-05-2024*** ## Introduction diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd index 276097b..70e4a18 100644 --- a/inst/markdown/MotifPeeker.Rmd +++ b/inst/markdown/MotifPeeker.Rmd @@ -45,7 +45,7 @@ params: value: NULL save_runfiles: value: NULL - workers: + BPPARAM: value: NULL debug: value: FALSE @@ -153,7 +153,7 @@ peak_width_df <- ## Motif-Summit Distances motif_summit_dist_df <- get_df_distances( - result, user_motifs, genome_build, out_dir_extra, params$workers, + result, user_motifs, genome_build, out_dir_extra, params$BPPARAM, params$meme_path, params$debug ) @@ -170,7 +170,7 @@ if (comparison_metrics) { if (user_motif_metrics) { enrichment_df <- get_df_enrichment( result, segregated_peaks, user_motifs, genome_build, - params$reference_index, out_dir_extra, params$workers, + params$reference_index, out_dir_extra, params$BPPARAM, params$meme_path, params$verbose ) } @@ -184,17 +184,17 @@ if (denovo_metrics) { unlist(segregated_peaks), params$trim_seq_width, genome_build, params$denovo_motifs, filter_n = params$filter_n, out_dir = out_dir_extra, meme_path = params$meme_path, - workers = params$workers, verbose = params$verbose, debug = params$debug + BPPARAM = params$BPPARAM, verbose = params$verbose, debug = params$debug ) ## Run TOMTOM denovo_res$similar_motifs <- find_motifs( denovo_res$streme, motif_db, out_dir = out_dir_extra, - meme_path = params$meme_path, workers = params$workers, + meme_path = params$meme_path, BPPARAM = params$BPPARAM, verbose = params$verbose, debug = params$debug ) ## Compare motifs denovo_res$comparisons <- motif_similarity( - denovo_res$streme, workers = params$workers + denovo_res$streme, BPPARAM = params$BPPARAM ) } ``` diff --git a/man/MotifPeeker.Rd b/man/MotifPeeker.Rd index f88967e..4896bb6 100644 --- a/man/MotifPeeker.Rd +++ b/man/MotifPeeker.Rd @@ -24,7 +24,7 @@ MotifPeeker( out_dir = tempdir(), save_runfiles = FALSE, display = if (interactive()) "browser", - workers = 2, + BPPARAM = BiocParallel::SerialParam(), quiet = TRUE, debug = FALSE, verbose = FALSE @@ -35,8 +35,8 @@ MotifPeeker( GRanges objects generated using \code{\link{read_peak_file}}. Currently, peak files from the following peak-calling tools are supported: \itemize{ - \item MACS2: \code{.narrowPeak} files - \item SEACR: \code{.bed} files + \item MACS2: \code{.narrowPeak} files + \item SEACR: \code{.bed} files } ENCODE file IDs can also be provided to automatically fetch peak file(s) from the ENCODE database.} @@ -126,14 +126,23 @@ generated during the run, such as those from FIMO and AME. (default = FALSE)} \item{display}{A character vector specifying the display mode for the HTML report once it is generated. (default = NULL) Options are: \itemize{ - \item \code{"browser"}: Open the report in the default web browser. - \item \code{"rstudio"}: Open the report in the RStudio Viewer. - \item \code{NULL}: Do not open the report. + \item \code{"browser"}: Open the report in the default web browser. + \item \code{"rstudio"}: Open the report in the RStudio Viewer. + \item \code{NULL}: Do not open the report. }} -\item{workers}{An integer specifying the number of threads to use for -parallel processing. (default = 1)\cr -\strong{IMPORTANT:} For each worker, please ensure a minimum of 6GB of +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +enabling parallel execution. (default = SerialParam(), single-CPU run)\cr\cr +Following are two examples of how to set up parallel processing: +\itemize{ + \item \code{BPPARAM = BiocParallel::MulticoreParam(4)}: Uses 4 + CPU cores for parallel processing. + \item \code{library("BiocParallel")} followed by + \code{register(MulticoreParam(4))} sets all subsequent BiocParallel + functions to use 4 CPU cores. \code{Motifpeeker()} must be run + with \code{BPPARAM = BiocParallel::MulticoreParam()}. +} +\strong{IMPORTANT:} For each worker, please ensure a minimum of 8GB of memory (RAM) is available as \code{denovo_motif_discovery} is memory-intensive.} @@ -162,20 +171,20 @@ denovo_motif_discovery disabled. However, de-novo motif discovery can take hours to complete. To make computation faster, we highly recommend tuning the following arguments: \describe{ - \item{\code{workers}}{Running motif discovery in parallel can - significantly reduce runtime, but it is very memory-intensive, consuming - upwards of 10GB of RAM per thread. Memory starvation can greatly slow the - process, so set \code{workers} with caution.} - \item{\code{denovo_motifs}}{The number of motifs to discover per sequence - group exponentially increases runtime. We recommend no more than 5 - motifs to make a meaningful inference.} - \item{\code{trim_seq_width}}{Trimming sequences before running de-novo - motif discovery can significantly reduce the search space. Sequence - length can exponentially increase runtime. We recommend running the - script with \code{denovo_motif_discovery = FALSE} and studying the - motif-summit distance distribution under general metrics to find the - sequence length that captures most motifs. A good starting point is 150 - but it can be reduced further if appropriate.} + \item{\code{BPPARAM=MulticoreParam(x)}}{Running motif discovery in + parallel can significantly reduce runtime, but it is very + memory-intensive, consuming 10+GB of RAM per thread. Memory starvation can + greatly slow the process, so set the number of cores with caution.} + \item{\code{denovo_motifs}}{The number of motifs to discover per sequence + group exponentially increases runtime. We recommend no more than 5 + motifs to make a meaningful inference.} + \item{\code{trim_seq_width}}{Trimming sequences before running de-novo + motif discovery can significantly reduce the search space. Sequence + length can exponentially increase runtime. We recommend running the + script with \code{denovo_motif_discovery = FALSE} and studying the + motif-summit distance distribution under general metrics to find the + sequence length that captures most motifs. A good starting point is 150 + but it can be reduced further if appropriate.} } } \note{ @@ -224,7 +233,6 @@ motifs <- list( motif_db = NULL, download_buttons = TRUE, out_dir = tempdir(), - workers = 1, debug = FALSE, quiet = TRUE, verbose = FALSE diff --git a/man/bpapply.Rd b/man/bpapply.Rd index 5b2301e..5012377 100644 --- a/man/bpapply.Rd +++ b/man/bpapply.Rd @@ -8,7 +8,7 @@ bpapply( X, FUN, apply_fun = BiocParallel::bplapply, - workers = 1, + BPPARAM = BiocParallel::bpparam(), progressbar = FALSE, force_snowparam = FALSE, verbose = FALSE, @@ -28,27 +28,12 @@ bpapply( \item{apply_fun}{A \code{\link[BiocParallel]{BiocParallel}} function to use for parallel processing. (default = \code{BiocParallel::bplapply})} -\item{workers}{The number of workers to use for parallel processing.} - -\item{progressbar}{ - \code{logical(1)} Enable progress bar (based on plyr:::progress_text). - } - -\item{force_snowparam}{A logical indicating whether to force the use of -\link[BiocParallel]{SnowParam} object.} - -\item{verbose}{A logical indicating whether to print verbose messages while -running the function. (default = FALSE)} +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +specifying run parameters. (default = bpparam())} \item{...}{ Arguments passed on to \code{\link[BiocParallel:bplapply]{BiocParallel::bplapply}}, \code{\link[BiocParallel:bpmapply]{BiocParallel::bpmapply}} \describe{ - \item{\code{BPPARAM}}{ - An optional \code{\link[BiocParallel]{BiocParallelParam}} instance - determining the parallel back-end to be used during evaluation, or a - \code{list} of \code{BiocParallelParam} instances, to be applied in - sequence for nested calls to \pkg{BiocParallel} functions. - } \item{\code{BPREDO}}{A \code{list} of output from \code{bplapply} with one or more failed elements. When a list is given in \code{BPREDO}, \code{bpok} is used to identify errors, tasks are rerun and inserted @@ -72,14 +57,13 @@ Output relevant to the \code{apply_fun} specified. } \description{ Light wrapper around \code{\link[BiocParallel]{BiocParallel}} functions that -automatically sets the appropriate parameters based on the number of workers -specified. +automatically applies appropriate parallel function. } \examples{ half_it <- function(arg1) return(arg1 / 2) x <- seq_len(10) -res <- MotifPeeker:::bpapply(x, half_it, workers = 2) +res <- MotifPeeker:::bpapply(x, half_it) print(res) } diff --git a/man/denovo_motifs.Rd b/man/denovo_motifs.Rd index 1690cb2..3c448b2 100644 --- a/man/denovo_motifs.Rd +++ b/man/denovo_motifs.Rd @@ -14,7 +14,7 @@ denovo_motifs( filter_n = 6, out_dir = tempdir(), meme_path = NULL, - workers = 1, + BPPARAM = BiocParallel::SerialParam(), verbose = FALSE, debug = FALSE, ... @@ -54,13 +54,14 @@ results to. (default = \code{tempdir()})} \item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default search behavior as described in \code{check_meme_install()} if unset.} -\item{workers}{The number of workers to use for parallel processing.} +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +specifying run parameters. (default = SerialParam(), single core run)} \item{verbose}{A logical indicating whether to print verbose messages while running the function. (default = FALSE)} -\item{debug}{A logical indicating whether to print debug/error messages in -the HTML report. (default = FALSE)} +\item{debug}{A logical indicating whether to print debug messages while +running the function. (default = FALSE)} \item{...}{Additional arguments to pass to \code{STREME}. For more information, refer to the official MEME Suite documentation on diff --git a/man/find_motifs.Rd b/man/find_motifs.Rd index 434bf98..ed43901 100644 --- a/man/find_motifs.Rd +++ b/man/find_motifs.Rd @@ -9,7 +9,7 @@ find_motifs( motif_db, out_dir = tempdir(), meme_path = NULL, - workers = 1, + BPPARAM = BiocParallel::bpparam(), verbose = FALSE, debug = FALSE, ... @@ -31,13 +31,14 @@ results to. (default = \code{tempdir()})} \item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default search behavior as described in \code{check_meme_install()} if unset.} -\item{workers}{The number of workers to use for parallel processing.} +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +specifying run parameters. (default = bpparam())} \item{verbose}{A logical indicating whether to print verbose messages while running the function. (default = FALSE)} -\item{debug}{A logical indicating whether to print debug/error messages in -the HTML report. (default = FALSE)} +\item{debug}{A logical indicating whether to print debug messages while +running the function. (default = FALSE)} \item{...}{Additional arguments to pass to \code{TOMTOM}. For more information, refer to the official MEME Suite documentation on diff --git a/man/get_bpparam.Rd b/man/get_bpparam.Rd deleted file mode 100644 index 2a57b59..0000000 --- a/man/get_bpparam.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_bpparam.R -\name{get_bpparam} -\alias{get_bpparam} -\title{Get parameters for \link[BiocParallel]{BiocParallel}} -\usage{ -get_bpparam( - workers, - progressbar = workers > 1, - force_snowparam = FALSE, - verbose = FALSE -) -} -\arguments{ -\item{workers}{The number of workers to use for parallel processing.} - -\item{progressbar}{ - \code{logical(1)} Enable progress bar (based on plyr:::progress_text). - } - -\item{force_snowparam}{A logical indicating whether to force the use of -\link[BiocParallel]{SnowParam} object.} - -\item{verbose}{A logical indicating whether to print verbose messages while -running the function. (default = FALSE)} -} -\value{ -A \code{BPPARAM} object. -} -\description{ -Get appropriate parameters for \code{BiocParallel} based on the -number of workers specified. For less than 10 workers, the function returns a -\code{MulticoreParam} object. For 10 or more cores, the function -returns a \code{SnowParam} object. Since Windows supports -neither, the function returns a \code{SerialParam} object. As a -result, Windows users do not benefit from parallel processing. -} -\seealso{ -\link[BiocParallel]{BiocParallelParam} -} -\keyword{internal} diff --git a/man/get_df_distances.Rd b/man/get_df_distances.Rd index f73af2c..b2e1afb 100644 --- a/man/get_df_distances.Rd +++ b/man/get_df_distances.Rd @@ -9,7 +9,7 @@ get_df_distances( user_motifs, genome_build, out_dir = tempdir(), - workers = 1, + BPPARAM = BiocParallel::bpparam(), meme_path = NULL, verbose = FALSE ) @@ -38,9 +38,18 @@ are supported as abbreviated input.} \item{out_dir}{A \code{character} vector of output directory.} -\item{workers}{An integer specifying the number of threads to use for -parallel processing. (default = 1)\cr -\strong{IMPORTANT:} For each worker, please ensure a minimum of 6GB of +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +enabling parallel execution. (default = SerialParam(), single-CPU run)\cr\cr +Following are two examples of how to set up parallel processing: +\itemize{ + \item \code{BPPARAM = BiocParallel::MulticoreParam(4)}: Uses 4 + CPU cores for parallel processing. + \item \code{library("BiocParallel")} followed by + \code{register(MulticoreParam(4))} sets all subsequent BiocParallel + functions to use 4 CPU cores. \code{Motifpeeker()} must be run + with \code{BPPARAM = BiocParallel::MulticoreParam()}. +} +\strong{IMPORTANT:} For each worker, please ensure a minimum of 8GB of memory (RAM) is available as \code{denovo_motif_discovery} is memory-intensive.} @@ -83,8 +92,7 @@ motifs <- list( if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 - distances_df <- get_df_distances(input, motifs, genome_build, - workers = 1) + distances_df <- get_df_distances(input, motifs, genome_build) print(distances_df) } } diff --git a/man/get_df_enrichment.Rd b/man/get_df_enrichment.Rd index e9d67be..10704da 100644 --- a/man/get_df_enrichment.Rd +++ b/man/get_df_enrichment.Rd @@ -11,7 +11,7 @@ get_df_enrichment( genome_build, reference_index = 1, out_dir = tempdir(), - workers = 1, + BPPARAM = BiocParallel::bpparam(), meme_path = NULL, verbose = FALSE ) @@ -47,9 +47,18 @@ use as the reference dataset for comparison. Indexing starts from 1. \item{out_dir}{A \code{character} vector of output directory.} -\item{workers}{An integer specifying the number of threads to use for -parallel processing. (default = 1)\cr -\strong{IMPORTANT:} For each worker, please ensure a minimum of 6GB of +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +enabling parallel execution. (default = SerialParam(), single-CPU run)\cr\cr +Following are two examples of how to set up parallel processing: +\itemize{ + \item \code{BPPARAM = BiocParallel::MulticoreParam(4)}: Uses 4 + CPU cores for parallel processing. + \item \code{library("BiocParallel")} followed by + \code{register(MulticoreParam(4))} sets all subsequent BiocParallel + functions to use 4 CPU cores. \code{Motifpeeker()} must be run + with \code{BPPARAM = BiocParallel::MulticoreParam()}. +} +\strong{IMPORTANT:} For each worker, please ensure a minimum of 8GB of memory (RAM) is available as \code{denovo_motif_discovery} is memory-intensive.} @@ -106,7 +115,7 @@ reference_index <- 1 enrichment_df <- get_df_enrichment( input, segregated_input, motifs, genome_build, - reference_index = 1, workers = 1 + reference_index = 1 ) } } diff --git a/man/motif_similarity.Rd b/man/motif_similarity.Rd index a928ee1..929929c 100644 --- a/man/motif_similarity.Rd +++ b/man/motif_similarity.Rd @@ -8,7 +8,7 @@ motif_similarity( streme_out, method = "PCC", normalise.scores = TRUE, - workers = 1, + BPPARAM = BiocParallel::bpparam(), ... ) } @@ -24,7 +24,8 @@ Similarity scores are multiplied by the ratio of aligned positions to the total number of positions in the larger motif, and the inverse for distance scores.} -\item{workers}{The number of workers to use for parallel processing.} +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam-class}} object +specifying run parameters. (default = bpparam())} \item{...}{ Arguments passed on to \code{\link[universalmotif:compare_motifs]{universalmotif::compare_motifs}} @@ -166,8 +167,7 @@ data("CTCF_ChIP_peaks", package = "MotifPeeker") genome_build = genome_build, denovo_motifs = 2, filter_n = 6, - out_dir = tempdir(), - workers = 1) + out_dir = tempdir()) similarity_matrices <- motif_similarity(denovo_motifs) print(similarity_matrices) } diff --git a/man/plot_enrichment_individual.Rd b/man/plot_enrichment_individual.Rd index 5c7e09a..d189a4c 100644 --- a/man/plot_enrichment_individual.Rd +++ b/man/plot_enrichment_individual.Rd @@ -78,7 +78,7 @@ motifs <- list( BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 enrichment_df <- get_df_enrichment( input, segregated_input, motifs, genome_build, - reference_index = 1, workers = 1 + reference_index = 1 ) label_colours <- c("red", "cyan") diff --git a/man/plot_enrichment_overall.Rd b/man/plot_enrichment_overall.Rd index d961211..3acc797 100644 --- a/man/plot_enrichment_overall.Rd +++ b/man/plot_enrichment_overall.Rd @@ -66,7 +66,7 @@ motifs <- list( BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 enrichment_df <- get_df_enrichment( input, segregated_input, motifs, genome_build, - reference_index = 1, workers = 1 + reference_index = 1 ) label_colours <- c("red", "cyan") diff --git a/tests/testthat/test-MotifPeeker.R b/tests/testthat/test-MotifPeeker.R index a4e9a4e..4645235 100644 --- a/tests/testthat/test-MotifPeeker.R +++ b/tests/testthat/test-MotifPeeker.R @@ -47,7 +47,6 @@ test_that("MotifPeeker produces output files", { denovo_motifs = 2, motif_db = NULL, download_buttons = TRUE, - workers = 1, out_dir = tempdir(), display = NULL, debug = FALSE, @@ -72,7 +71,6 @@ test_that("MotifPeeker produces output files", { motif_db = NULL, download_buttons = TRUE, out_dir = tempdir(), - workers = 1, display = NULL, debug = FALSE, verbose = FALSE @@ -99,7 +97,6 @@ test_that("MotifPeeker produces output files", { motif_db = NULL, download_buttons = TRUE, out_dir = tempdir(), - workers = 1, display = NULL, debug = FALSE, verbose = FALSE diff --git a/tests/testthat/test-bpapply.R b/tests/testthat/test-bpapply.R index 850dc2f..ddc4310 100644 --- a/tests/testthat/test-bpapply.R +++ b/tests/testthat/test-bpapply.R @@ -11,17 +11,12 @@ test_that("bpapply works", { apply_fun = "does_not_exist")) ### bplapply ### - res <- MotifPeeker:::bpapply(x, test_func, workers = 2) - expect_equal(unlist(res), x) - - ### SnowParam ### - res <- MotifPeeker:::bpapply(x, test_func, workers = 1, - force_snowparam = TRUE, progressbar = FALSE) + res <- MotifPeeker:::bpapply(x, test_func) expect_equal(unlist(res), x) ### bpmapply ### res <- MotifPeeker:::bpapply(x, test_func, - apply_fun = BiocParallel::bpmapply, workers = 2, + apply_fun = BiocParallel::bpmapply, MoreArgs = list(arg2 = y), progressbar = FALSE) expect_equal(res[1,2], 3) }) diff --git a/tests/testthat/test-denovo_motif_funcs.R b/tests/testthat/test-denovo_motif_funcs.R index 3e5add4..1babd39 100644 --- a/tests/testthat/test-denovo_motif_funcs.R +++ b/tests/testthat/test-denovo_motif_funcs.R @@ -14,7 +14,6 @@ test_that("De-novo motif enrichment functions works", { denovo_motifs = 2, filter_n = 6, out_dir = tempdir(), - workers = 1, verbose = FALSE, debug = FALSE)) @@ -29,14 +28,13 @@ test_that("De-novo motif enrichment functions works", { motif_db <- get_JASPARCORE() res2 <- find_motifs(res, motif_db = motif_db, - workers = 1, verbose = TRUE, debug = TRUE) expect_length(res2, 4) expect_equal(res2[[1]][[1]]$motif[[1]]@alphabet, "DNA") ## motif_similarity ### - res3 <- motif_similarity(res, workers = 1) + res3 <- motif_similarity(res) expect_true(all(vapply(res3, is.matrix, logical(1)))) ### plot_motif_comparison ### diff --git a/tests/testthat/test-enrichment_funcs.R b/tests/testthat/test-enrichment_funcs.R index e98b360..55c2384 100644 --- a/tests/testthat/test-enrichment_funcs.R +++ b/tests/testthat/test-enrichment_funcs.R @@ -22,8 +22,7 @@ test_that("enrichment plotting and datatable functions works", { genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 enrichment_df <- get_df_enrichment( - input, segregated_input, motifs, genome_build, reference_index = 1, - workers = 1 + input, segregated_input, motifs, genome_build, reference_index = 1 ) label_colours <- c("red", "cyan") diff --git a/tests/testthat/test-get_df_distances.R b/tests/testthat/test-get_df_distances.R index b90b46e..6615432 100644 --- a/tests/testthat/test-get_df_distances.R +++ b/tests/testthat/test-get_df_distances.R @@ -17,7 +17,7 @@ test_that("get_df_distances works", { ) genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 - distances_df <- get_df_distances(input, motifs, genome_build, workers = 1, + distances_df <- get_df_distances(input, motifs, genome_build, verbose = FALSE) expect_true(is.data.frame(distances_df)) diff --git a/vignettes/MotifPeeker.Rmd b/vignettes/MotifPeeker.Rmd index 710c2af..9653e58 100644 --- a/vignettes/MotifPeeker.Rmd +++ b/vignettes/MotifPeeker.Rmd @@ -201,7 +201,7 @@ if (MotifPeeker:::confirm_meme_install(continue = TRUE)) { motif_db = NULL, # Use default motif database (JASPAR) download_buttons = TRUE, out_dir = tempdir(), # Save output in a temporary directory - workers = 2, # Use two CPU cores on a 16GB RAM machine + BPPARAM = BiocParallel::SerialParam(), # Use two CPU cores on a 16GB RAM machine debug = FALSE, quiet = TRUE, verbose = TRUE @@ -289,10 +289,10 @@ To make computation faster, we highly recommend tuning the following arguments:
Details -- `workers`: Running motif discovery in parallel can significantly reduce - runtime, but it is very memory-intensive, consuming upwards of 10GB of RAM - per thread. Memory starvation can greatly slow the process, so set `workers` - with caution. +- `BPPARAM = Multicore(x)`: Running motif discovery in parallel can + significantly reduce runtime, but it is very memory-intensive, consuming + upwards of 10GB of RAM per thread. Memory starvation can greatly slow the + process, so set workers (x) with caution. - `denovo_motifs`: The number of motifs to discover per sequence group exponentially increases runtime. We recommend no more than 5 motifs to make a meaningful inference. diff --git a/vignettes/troubleshooting.Rmd b/vignettes/troubleshooting.Rmd index cb3f8d6..391cb10 100644 --- a/vignettes/troubleshooting.Rmd +++ b/vignettes/troubleshooting.Rmd @@ -37,7 +37,7 @@ If you encounter an issue that is not covered, please open an issue on the 1. **Function takes too long to run** It is likely de-novo motif discovery is what is taking too long to run. Try - reducing the number of `workers` if you are running out of memory while + reducing the number of workers if you are running out of memory while running the `MotifPeeker()` function. Additionally, follow the [runtime guidance](https://neurogenomics.github.io/MotifPeeker/articles/MotifPeeker.html#runtime) for `MotifPeeker()`.