diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 653aa6e..777f092 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,7 +29,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -51,3 +51,4 @@ jobs: MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }} with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index f4c4ef2..f60d047 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -14,7 +14,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index ed7650c..a7276e8 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -22,7 +22,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,7 +41,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ce99f80..709a124 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -15,7 +15,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -43,12 +43,12 @@ jobs: if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 76625f9..ababa6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tarflow.iquizoo Title: Setup "targets" Workflows for "iquizoo" Data Processing -Version: 3.9.3 +Version: 3.9.3.9000 Authors@R: c( person("Liang", "Zhang", , "psychelzh@outlook.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9041-1150")), @@ -22,14 +22,11 @@ Imports: cli, data.iquizoo (>= 2023.10.22), DBI, - dplyr, memoise, - purrr, rlang (>= 1.0.0), stringr, tarchetypes, - targets, - tidyr + targets Suggests: bit64, covr, @@ -37,11 +34,11 @@ Suggests: lifecycle, odbc, preproc.iquizoo (>= 2.6.0), + purrr, RMariaDB (>= 1.3.1), roxygen2, testthat (>= 3.0.0), tibble, - tidytable, withr Remotes: psychelzh/data.iquizoo, diff --git a/NAMESPACE b/NAMESPACE index d137ab8..c1939fc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,8 +7,9 @@ export(fetch_iquizoo_mem) export(setup_option_file) export(setup_source) export(setup_templates) +export(tar_fetch_data) export(tar_prep_iquizoo) +export(tar_prep_proj) +export(tar_prep_raw) export(use_targets_pipeline) -import(dplyr) import(rlang) -import(tidyr) diff --git a/NEWS.md b/NEWS.md index 321ae48..49371ee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,17 @@ +# tarflow.iquizoo (development version) + +## Breaking Changes + +* Now `query` argument of `fetch_data()` is optional. If not specified, the default query stored in the package will be used. + +## Enhancements + +* Added `suffix_format` argument to `fetch_data()` to specify the format of suffix in the query file. This is useful when you want to use a different format of suffix in the query file. +* Enhanced the documentation of `fetch_data()`. +* Let package not depend on dplyr, tidyr and purrr packages (#84). +* Exported more targets factory functions: `tar_prep_proj()`, `tar_fetch_data()`, `tar_prep_raw()`. +* Do not add `progress_hash` objects when `check_progress` is set to `FALSE` in `tar_prep_iquizoo()`. + # tarflow.iquizoo 3.9.3 ## Breaking Changes diff --git a/R/database.R b/R/database.R index ef540bb..e31669c 100644 --- a/R/database.R +++ b/R/database.R @@ -70,25 +70,46 @@ fetch_iquizoo_mem <- function(cache = NULL) { #' Fetch data from iQuizoo database #' -#' @param query A parameterized SQL query. Note the query should also contain -#' a `glue` expression to inject the table name, i.e., `"{ table_name }"`. +#' This function is a wrapper of [fetch_iquizoo()], which is used as a helper +#' function to fetch data from the iQuizoo database. +#' +#' The data essentially means one of the two types of data: raw data or scores. +#' The raw data is the original data collected from the game, while the scores +#' are the scores calculated by the iQuizoo system. While scores can also be +#' calculated from the raw data, the pre-calculated scores are used to for some +#' quick analysis. +#' +#' The data is separated by project date, so the table name is suffixed by the +#' project date, which is automatically fetched from the database by this +#' function. You could set the format of the date suffix by `suffix_format`, +#' although currently you should not need to change it because it probably will +#' not change in the future. Finally, this suffix should be substituted into the +#' query, which should contain an expression to inject the table name, i.e., +#' `"{table_name}"`. +#' #' @param project_id The project id to be bound to the query. #' @param game_id The game id to be bound to the query. #' @param ... Further arguments passed to [fetch_iquizoo()]. #' @param what What to fetch. Can be either "raw_data" or "scores". +#' @param query A parameterized SQL query. A default query file is stored in the +#' package, which is often enough for most cases. You can also specify your +#' own query file by this argument. See details for more information. +#' @param suffix_format The format of the date suffix. See details for more +#' information. #' @return A [data.frame] contains the fetched data. #' @export -fetch_data <- function(query, project_id, game_id, ..., - what = c("raw_data", "scores")) { +fetch_data <- function(project_id, game_id, ..., + what = c("raw_data", "scores"), + query = NULL, + suffix_format = "%Y0101") { check_dots_used() what <- match.arg(what) - # the database stores data from each year into a separate table with the - # suffix of course date with the format "0101" + # data separated by project date, so we need to get the project date first suffix <- package_file("sql", "project_date.sql") |> read_file() |> fetch_iquizoo(params = project_id) |> .subset2("project_date") |> - format("%Y0101") + format(suffix_format) table_name <- paste0( switch(what, raw_data = "content_orginal_data_", @@ -96,6 +117,7 @@ fetch_data <- function(query, project_id, game_id, ..., ), suffix ) + query <- query %||% read_file(package_file("sql", paste0(what, ".sql"))) fetch_iquizoo( stringr::str_glue( query, @@ -105,96 +127,3 @@ fetch_data <- function(query, project_id, game_id, ..., params = list(project_id, game_id) ) } - -#' Set data source -#' -#' @param driver The driver used. Set as an option of `"tarflow.driver"`. -#' Options are [odbc::odbc()] and [RMariaDB::MariaDB()], both of which need -#' pre-configurations. Default to first available one. -#' @param dsn The data source name of an **ODBC** database connector. See -#' [odbc::dbConnect()] for more information. Used when `driver` is set as -#' [odbc::odbc()]. -#' @param groups Section identifier in the `default.file`. See -#' [RMariaDB::MariaDB()] for more information. Used when `driver` is set as -#' [RMariaDB::MariaDB()]. -#' @return An S3 class of `tarflow.source` with the options. -#' @export -setup_source <- function(driver = getOption("tarflow.driver"), - dsn = getOption("tarflow.dsn"), - groups = getOption("tarflow.groups")) { - structure( - list( - driver = driver, - dsn = dsn, - groups = groups - ), - class = "tarflow.source" - ) -} - -#' Check if the database based on the given data source is ready -#' -#' @param source The data source from which data is fetched. See -#' [setup_source()] for details. -#' @return TRUE if the database is ready, FALSE otherwise. -#' @export -check_source <- function(source = setup_source()) { - if (!inherits(source, "tarflow.source")) { - cli::cli_abort( - "{.arg source} must be created by {.fun setup_source}.", - class = "tarflow_bad_source" - ) - } - # nocov start - if (inherits(source$driver, "OdbcDriver")) { - return(DBI::dbCanConnect(source$driver, dsn = source$dsn)) - } - # nocov end - if (inherits(source$driver, "MariaDBDriver")) { - return(DBI::dbCanConnect(source$driver, groups = source$groups)) - } - return(FALSE) -} - -# nocov start - -#' Setup MySQL database connection option file -#' -#' This function will create a MySQL option file at the given path. To ensure it -#' works, set these environment variables before calling this function: -#' - `MYSQL_HOST`: The host name of the MySQL server. -#' - `MYSQL_USER`: The user name of the MySQL server. -#' - `MYSQL_PASSWORD`: The password of the MySQL server. -#' -#' @param path The path to the option file. Default location is operating system -#' dependent. On Windows, it is `C:/my.cnf`. On other systems, it is -#' `~/.my.cnf`. -#' @param overwrite Whether to overwrite the existing option file. -#' @param quietly A logical indicates whether message should be suppressed. -#' @return NULL (invisible). -#' @export -setup_option_file <- function(path = NULL, overwrite = FALSE, quietly = FALSE) { - my_cnf_tmpl <- read_file(package_file("database", "my.cnf.tmpl")) - path <- path %||% default_file() - if (file.exists(path) && !overwrite) { - if (!quietly) { - cli::cli_alert_warning( - "Option file already exists. Use {.arg overwrite = TRUE} to overwrite.", - class = "tarflow_option_file_exists" - ) - } - return(invisible()) - } - writeLines(stringr::str_glue(my_cnf_tmpl), path) -} - -# helper functions -default_file <- function() { - if (Sys.info()["sysname"] == "Windows") { - return("C:/my.cnf") - } else { - return("~/.my.cnf") - } -} - -# nocov end diff --git a/R/setup.R b/R/setup.R new file mode 100644 index 0000000..0c184a6 --- /dev/null +++ b/R/setup.R @@ -0,0 +1,131 @@ +#' Set up templates used to fetch data +#' +#' If you want to extract data based on your own parameters, you should use this +#' function to set up your own SQL templates. Note that the SQL queries should +#' be parameterized. +#' +#' @param contents The SQL template file used to fetch contents. At least +#' `project_id` and `game_id` columns should be included in the fetched data +#' based on the template. `project_id` will be used as the only parameter in +#' `users` and `project` templates, while all three will be used in `raw_data` +#' and `scores` templates. +#' @param users The SQL template file used to fetch users. Usually you don't +#' need to change this. +#' @param raw_data The SQL template file used to fetch raw data. See +#' [fetch_data()] for details. Usually you don't need to change this. +#' @param scores The SQL template file used to fetch scores. See [fetch_data()] +#' for details. Usually you don't need to change this. +#' @param progress_hash The SQL template file used to fetch progress hash. +#' Usually you don't need to change this. +#' @return A S3 object of class `tarflow.template` with the options. +#' @export +setup_templates <- function(contents = NULL, + users = NULL, + raw_data = NULL, + scores = NULL, + progress_hash = NULL) { + structure( + list( + contents = contents %||% package_file("sql", "contents.sql"), + users = users %||% package_file("sql", "users.sql"), + raw_data = raw_data %||% package_file("sql", "raw_data.sql"), + scores = scores %||% package_file("sql", "scores.sql"), + progress_hash = progress_hash %||% + package_file("sql", "progress_hash.sql") + ), + class = "tarflow.template" + ) +} + +#' Set data source +#' +#' @param driver The driver used. Set as an option of `"tarflow.driver"`. +#' Options are [odbc::odbc()] and [RMariaDB::MariaDB()], both of which need +#' pre-configurations. Default to first available one. +#' @param dsn The data source name of an **ODBC** database connector. See +#' [odbc::dbConnect()] for more information. Used when `driver` is set as +#' [odbc::odbc()]. +#' @param groups Section identifier in the `default.file`. See +#' [RMariaDB::MariaDB()] for more information. Used when `driver` is set as +#' [RMariaDB::MariaDB()]. +#' @return An S3 class of `tarflow.source` with the options. +#' @export +setup_source <- function(driver = getOption("tarflow.driver"), + dsn = getOption("tarflow.dsn"), + groups = getOption("tarflow.groups")) { + structure( + list( + driver = driver, + dsn = dsn, + groups = groups + ), + class = "tarflow.source" + ) +} + +#' Check if the database based on the given data source is ready +#' +#' @param source The data source from which data is fetched. See +#' [setup_source()] for details. +#' @return TRUE if the database is ready, FALSE otherwise. +#' @export +check_source <- function(source = setup_source()) { + if (!inherits(source, "tarflow.source")) { + cli::cli_abort( + "{.arg source} must be created by {.fun setup_source}.", + class = "tarflow_bad_source" + ) + } + # nocov start + if (inherits(source$driver, "OdbcDriver")) { + return(DBI::dbCanConnect(source$driver, dsn = source$dsn)) + } + # nocov end + if (inherits(source$driver, "MariaDBDriver")) { + return(DBI::dbCanConnect(source$driver, groups = source$groups)) + } + return(FALSE) +} + +# nocov start + +#' Setup MySQL database connection option file +#' +#' This function will create a MySQL option file at the given path. To ensure it +#' works, set these environment variables before calling this function: +#' - `MYSQL_HOST`: The host name of the MySQL server. +#' - `MYSQL_USER`: The user name of the MySQL server. +#' - `MYSQL_PASSWORD`: The password of the MySQL server. +#' +#' @param path The path to the option file. Default location is operating system +#' dependent. On Windows, it is `C:/my.cnf`. On other systems, it is +#' `~/.my.cnf`. +#' @param overwrite Whether to overwrite the existing option file. +#' @param quietly A logical indicates whether message should be suppressed. +#' @return NULL (invisible). +#' @export +setup_option_file <- function(path = NULL, overwrite = FALSE, quietly = FALSE) { + my_cnf_tmpl <- read_file(package_file("database", "my.cnf.tmpl")) + path <- path %||% default_file() + if (file.exists(path) && !overwrite) { + if (!quietly) { + cli::cli_alert_warning( + "Option file already exists. Use {.arg overwrite = TRUE} to overwrite.", + class = "tarflow_option_file_exists" + ) + } + return(invisible()) + } + writeLines(stringr::str_glue(my_cnf_tmpl), path) +} + +# helper functions +default_file <- function() { + if (Sys.info()["sysname"] == "Windows") { + return("C:/my.cnf") + } else { + return("~/.my.cnf") + } +} + +# nocov end diff --git a/R/targets.R b/R/targets.R index e82f1f5..3636270 100644 --- a/R/targets.R +++ b/R/targets.R @@ -78,14 +78,14 @@ tar_prep_iquizoo <- function(params, ..., "contents_origin", expr(unserialize(!!serialize(contents, NULL))) ), - tar_projects_info(contents, templates, check_progress), - purrr::map( + tar_prep_proj(contents, templates, check_progress), + sapply( what, - \(what) tar_fetch_data(contents, templates, what) - ) |> - purrr::list_flatten(), + \(what) tar_fetch_data(contents, templates, what, check_progress), + simplify = FALSE + ), if ("raw_data" %in% what && action_raw_data != "none") { - tar_action_raw_data(contents, action_raw_data) + tar_prep_raw(contents, action_raw_data) } ) c( @@ -102,70 +102,49 @@ tar_prep_iquizoo <- function(params, ..., ) } -#' Set up templates used to fetch data +#' Generate a set of targets for preparing project-level data #' -#' If you want to extract data based on your own parameters, you should use this -#' function to set up your own SQL templates. Note that the SQL queries should -#' be parameterized. +#' There are mainly two types of data to be fetched, i.e., the progress hash and +#' the user information. The former is used to check the progress of the +#' project, while the latter is used to identify the users involved in the +#' project. #' -#' @param contents The SQL template file used to fetch contents. At least -#' `project_id` and `game_id` columns should be included in the fetched data -#' based on the template. `project_id` will be used as the only parameter in -#' `users` and `project` templates, while all three will be used in `raw_data` -#' and `scores` templates. -#' @param users The SQL template file used to fetch users. Usually you don't -#' need to change this. -#' @param raw_data The SQL template file used to fetch raw data. See -#' [fetch_data()] for details. Usually you don't need to change this. -#' @param scores The SQL template file used to fetch scores. See [fetch_data()] -#' for details. Usually you don't need to change this. -#' @param progress_hash The SQL template file used to fetch progress hash. -#' Usually you don't need to change this. -#' @return A S3 object of class `tarflow.template` with the options. +#' @param contents The contents structure used as the configuration of data +#' fetching. +#' @param templates The SQL template files used to fetch data. See +#' [setup_templates()] for details. +#' @param check_progress Whether to check the progress hash. When set as `TRUE`, +#' a progress hash objects named as `progress_hash_{project_id}` for each +#' project will be added into the target list. Set it as `FALSE` if the +#' projects are finalized. +#' @return A list of target objects. #' @export -setup_templates <- function(contents = NULL, - users = NULL, - raw_data = NULL, - scores = NULL, - progress_hash = NULL) { - structure( - list( - contents = contents %||% package_file("sql", "contents.sql"), - users = users %||% package_file("sql", "users.sql"), - raw_data = raw_data %||% package_file("sql", "raw_data.sql"), - scores = scores %||% package_file("sql", "scores.sql"), - progress_hash = progress_hash %||% - package_file("sql", "progress_hash.sql") - ), - class = "tarflow.template" - ) -} - -# helper functions -tar_projects_info <- function(contents, templates, check_progress) { +tar_prep_proj <- function(contents, + templates = setup_templates(), + check_progress = TRUE) { c( - tarchetypes::tar_map( - contents |> - distinct(.data$project_id) |> - mutate(project_id = as.character(.data$project_id)), - targets::tar_target_raw( - "progress_hash", - expr( - fetch_iquizoo( - !!read_file(templates[["progress_hash"]]), - params = list(project_id) - ) - ), - packages = "tarflow.iquizoo", - cue = targets::tar_cue(if (check_progress) "always") + if (check_progress) { + tarchetypes::tar_map( + data.frame(project_id = as.character(unique(contents$project_id))), + targets::tar_target_raw( + "progress_hash", + bquote( + fetch_iquizoo( + .(read_file(templates[["progress_hash"]])), + params = list(project_id) + ) + ), + packages = "tarflow.iquizoo", + cue = targets::tar_cue("always") + ) ) - ), + }, targets::tar_target_raw( "users", - expr( + bquote( fetch_iquizoo( - !!read_file(templates[["users"]]), - params = list(!!unique(contents$project_id)) + .(read_file(templates[["users"]])), + params = list(.(unique(contents$project_id))) ) |> unique() ), @@ -174,57 +153,92 @@ tar_projects_info <- function(contents, templates, check_progress) { ) } -tar_fetch_data <- function(contents, templates, what) { - tarchetypes::tar_map( - contents |> - distinct(.data$project_id, .data$game_id) |> - mutate( - across(c("project_id", "game_id"), as.character) - ) |> - summarise( - progress_hash = list( - syms( - stringr::str_glue("progress_hash_{project_id}") +#' Generate a set of targets for fetching data +#' +#' This target factory is the main part of the `tar_prep_iquizoo` function. It +#' fetches the raw data and scores for each project and task/game combination. +#' +#' @param contents The contents structure used as the configuration of data +#' fetching. +#' @param templates The SQL template files used to fetch data. See +#' [setup_templates()] for details. +#' @param what What to fetch. +#' @param check_progress Whether to check the progress hash. If set as `TRUE`, +#' Before fetching the data, the progress hash objects named as +#' `progress_hash_{project_id}` will be depended on, which are typically +#' generated by [tar_prep_proj()]. If the projects are finalized, set this +#' argument as `FALSE`. +#' @return A list of target objects. +#' @export +tar_fetch_data <- function(contents, + templates = setup_templates(), + what = c("raw_data", "scores"), + check_progress = TRUE) { + what <- match.arg(what) + by( + contents, + contents$game_id, + \(contents) { + project_ids <- as.character(unique(contents$project_id)) + game_id <- as.character(unique(contents$game_id)) + targets::tar_target_raw( + paste0(what, "_", game_id), + as.call(c( + quote(`{`), + if (check_progress) { + bquote( + list(..(syms(paste0("progress_hash_", project_ids)))), + splice = TRUE + ) + }, + bquote( + do.call( + rbind, + .mapply( + fetch_data, + list(.(project_ids), .(game_id)), + MoreArgs = list( + what = .(what), + query = .(read_file(templates[[what]])) + ) + ) + ) ) - ), - project_id = list(.data$project_id), - .by = "game_id" - ), - names = "game_id", - targets::tar_target_raw( - what, - expr({ - progress_hash - purrr::pmap( - list( - query = !!read_file(templates[[what]]), - project_id = project_id, - game_id = game_id, - what = !!what - ), - fetch_data - ) |> - purrr::list_rbind() - }), - packages = "tarflow.iquizoo" - ) + )), + packages = "tarflow.iquizoo" + ) + } ) } -tar_action_raw_data <- function(contents, - action_raw_data, - name_data = "raw_data", - name_parsed = "raw_data_parsed", - name_indices = "indices") { +#' Generate a set of targets for wrangling and pre-processing raw data +#' +#' This target factory is the main part of the `tar_prep_iquizoo` function. It +#' wrangles the raw data into a tidy format and calculates indices based on the +#' parsed data. +#' +#' @param contents The contents structure used as the configuration of data +#' fetching. +#' @param action_raw_data The action to be taken on the fetched raw data. +#' @param name_data The name of the raw data target. +#' @param name_parsed The name of the parsed data target. +#' @param name_indices The name of the indices target. +#' @return A list of target objects. +#' @export +tar_prep_raw <- function(contents, + action_raw_data = c("all", "parse", "none"), + name_data = "raw_data", + name_parsed = "raw_data_parsed", + name_indices = "indices") { + action_raw_data <- match.arg(action_raw_data) if (action_raw_data == "all") action_raw_data <- c("parse", "preproc") - contents <- distinct(contents, .data$game_id) |> - mutate( - tar_data = syms(sprintf("%s_%s", name_data, game_id)), - tar_parsed = syms(sprintf("%s_%s", name_parsed, game_id)), - tar_indices = syms(sprintf("%s_%s", name_indices, game_id)) - ) + contents <- unique(contents["game_id"]) + contents$tar_data <- syms(sprintf("%s_%s", name_data, contents$game_id)) + contents$tar_parsed <- syms(sprintf("%s_%s", name_parsed, contents$game_id)) + contents$tar_indices <- syms(sprintf("%s_%s", name_indices, contents$game_id)) list( raw_data_parsed = if ("parse" %in% action_raw_data) { + check_installed("preproc.iquizoo", "becasue required in wrangling.") tarchetypes::tar_eval( targets::tar_target( tar_parsed, @@ -235,14 +249,14 @@ tar_action_raw_data <- function(contents, ) }, indices = if ("preproc" %in% action_raw_data) { + check_installed("preproc.iquizoo", "becasue required in pre-processing.") tarchetypes::tar_eval( targets::tar_target( tar_indices, preproc_data(tar_parsed, prep_fun, .input = input, .extra = extra), packages = "preproc.iquizoo" ), - contents |> - data.iquizoo::match_preproc(type = "inner") + data.iquizoo::match_preproc(contents, type = "inner") ) } ) @@ -254,7 +268,6 @@ objects <- function() { utils::globalVariables( c( - "progress_hash", "project_id", "game_id", "tar_data", "tar_parsed", "tar_indices", "wrangle_data", "preproc_data", "prep_fun", "input", "extra" diff --git a/R/zzz.R b/R/zzz.R index 754642d..73175b0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,4 @@ #' @import rlang -#' @import dplyr -#' @import tidyr NULL # nocov start diff --git a/_pkgdown.yml b/_pkgdown.yml index 5606e05..4ff8f0e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,16 +7,19 @@ reference: contents: - use_targets_pipeline - tar_prep_iquizoo + - tar_prep_proj + - tar_fetch_data + - tar_prep_raw - title: "Low-level Database operations" desc: Functions to help you interact with database. contents: - fetch_iquizoo - fetch_iquizoo_mem - fetch_data - - check_source - - setup_option_file - title: "Miscellaneous Option Functions" desc: Functions to help you set up options. contents: - setup_templates - setup_source + - setup_option_file + - check_source diff --git a/man/check_source.Rd b/man/check_source.Rd index 9180d8c..0114107 100644 --- a/man/check_source.Rd +++ b/man/check_source.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/database.R +% Please edit documentation in R/setup.R \name{check_source} \alias{check_source} \title{Check if the database based on the given data source is ready} diff --git a/man/fetch_data.Rd b/man/fetch_data.Rd index a05fed2..2e65bad 100644 --- a/man/fetch_data.Rd +++ b/man/fetch_data.Rd @@ -4,12 +4,16 @@ \alias{fetch_data} \title{Fetch data from iQuizoo database} \usage{ -fetch_data(query, project_id, game_id, ..., what = c("raw_data", "scores")) +fetch_data( + project_id, + game_id, + ..., + what = c("raw_data", "scores"), + query = NULL, + suffix_format = "\%Y0101" +) } \arguments{ -\item{query}{A parameterized SQL query. Note the query should also contain -a \code{glue} expression to inject the table name, i.e., \code{"{ table_name }"}.} - \item{project_id}{The project id to be bound to the query.} \item{game_id}{The game id to be bound to the query.} @@ -17,10 +21,33 @@ a \code{glue} expression to inject the table name, i.e., \code{"{ table_name }"} \item{...}{Further arguments passed to \code{\link[=fetch_iquizoo]{fetch_iquizoo()}}.} \item{what}{What to fetch. Can be either "raw_data" or "scores".} + +\item{query}{A parameterized SQL query. A default query file is stored in the +package, which is often enough for most cases. You can also specify your +own query file by this argument. See details for more information.} + +\item{suffix_format}{The format of the date suffix. See details for more +information.} } \value{ A \link{data.frame} contains the fetched data. } \description{ -Fetch data from iQuizoo database +This function is a wrapper of \code{\link[=fetch_iquizoo]{fetch_iquizoo()}}, which is used as a helper +function to fetch data from the iQuizoo database. +} +\details{ +The data essentially means one of the two types of data: raw data or scores. +The raw data is the original data collected from the game, while the scores +are the scores calculated by the iQuizoo system. While scores can also be +calculated from the raw data, the pre-calculated scores are used to for some +quick analysis. + +The data is separated by project date, so the table name is suffixed by the +project date, which is automatically fetched from the database by this +function. You could set the format of the date suffix by \code{suffix_format}, +although currently you should not need to change it because it probably will +not change in the future. Finally, this suffix should be substituted into the +query, which should contain an expression to inject the table name, i.e., +\code{"{table_name}"}. } diff --git a/man/setup_option_file.Rd b/man/setup_option_file.Rd index e76ddbf..af9abdd 100644 --- a/man/setup_option_file.Rd +++ b/man/setup_option_file.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/database.R +% Please edit documentation in R/setup.R \name{setup_option_file} \alias{setup_option_file} \title{Setup MySQL database connection option file} diff --git a/man/setup_source.Rd b/man/setup_source.Rd index 7a47d67..f082d09 100644 --- a/man/setup_source.Rd +++ b/man/setup_source.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/database.R +% Please edit documentation in R/setup.R \name{setup_source} \alias{setup_source} \title{Set data source} diff --git a/man/setup_templates.Rd b/man/setup_templates.Rd index 16d708a..a66961e 100644 --- a/man/setup_templates.Rd +++ b/man/setup_templates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/targets.R +% Please edit documentation in R/setup.R \name{setup_templates} \alias{setup_templates} \title{Set up templates used to fetch data} diff --git a/man/tar_fetch_data.Rd b/man/tar_fetch_data.Rd new file mode 100644 index 0000000..8a52434 --- /dev/null +++ b/man/tar_fetch_data.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/targets.R +\name{tar_fetch_data} +\alias{tar_fetch_data} +\title{Generate a set of targets for fetching data} +\usage{ +tar_fetch_data( + contents, + templates = setup_templates(), + what = c("raw_data", "scores"), + check_progress = TRUE +) +} +\arguments{ +\item{contents}{The contents structure used as the configuration of data +fetching.} + +\item{templates}{The SQL template files used to fetch data. See +\code{\link[=setup_templates]{setup_templates()}} for details.} + +\item{what}{What to fetch.} + +\item{check_progress}{Whether to check the progress hash. If set as \code{TRUE}, +Before fetching the data, the progress hash objects named as +\verb{progress_hash_\{project_id\}} will be depended on, which are typically +generated by \code{\link[=tar_prep_proj]{tar_prep_proj()}}. If the projects are finalized, set this +argument as \code{FALSE}.} +} +\value{ +A list of target objects. +} +\description{ +This target factory is the main part of the \code{tar_prep_iquizoo} function. It +fetches the raw data and scores for each project and task/game combination. +} diff --git a/man/tar_prep_proj.Rd b/man/tar_prep_proj.Rd new file mode 100644 index 0000000..17b2101 --- /dev/null +++ b/man/tar_prep_proj.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/targets.R +\name{tar_prep_proj} +\alias{tar_prep_proj} +\title{Generate a set of targets for preparing project-level data} +\usage{ +tar_prep_proj(contents, templates = setup_templates(), check_progress = TRUE) +} +\arguments{ +\item{contents}{The contents structure used as the configuration of data +fetching.} + +\item{templates}{The SQL template files used to fetch data. See +\code{\link[=setup_templates]{setup_templates()}} for details.} + +\item{check_progress}{Whether to check the progress hash. When set as \code{TRUE}, +a progress hash objects named as \verb{progress_hash_\{project_id\}} for each +project will be added into the target list. Set it as \code{FALSE} if the +projects are finalized.} +} +\value{ +A list of target objects. +} +\description{ +There are mainly two types of data to be fetched, i.e., the progress hash and +the user information. The former is used to check the progress of the +project, while the latter is used to identify the users involved in the +project. +} diff --git a/man/tar_prep_raw.Rd b/man/tar_prep_raw.Rd new file mode 100644 index 0000000..bbf5214 --- /dev/null +++ b/man/tar_prep_raw.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/targets.R +\name{tar_prep_raw} +\alias{tar_prep_raw} +\title{Generate a set of targets for wrangling and pre-processing raw data} +\usage{ +tar_prep_raw( + contents, + action_raw_data = c("all", "parse", "none"), + name_data = "raw_data", + name_parsed = "raw_data_parsed", + name_indices = "indices" +) +} +\arguments{ +\item{contents}{The contents structure used as the configuration of data +fetching.} + +\item{action_raw_data}{The action to be taken on the fetched raw data.} + +\item{name_data}{The name of the raw data target.} + +\item{name_parsed}{The name of the parsed data target.} + +\item{name_indices}{The name of the indices target.} +} +\value{ +A list of target objects. +} +\description{ +This target factory is the main part of the \code{tar_prep_iquizoo} function. It +wrangles the raw data into a tidy format and calculates indices based on the +parsed data. +} diff --git a/tests/testthat/_snaps/targets.md b/tests/testthat/_snaps/targets.md index 6cc92cc..2c60a50 100644 --- a/tests/testthat/_snaps/targets.md +++ b/tests/testthat/_snaps/targets.md @@ -3,6 +3,188 @@ { "type": "character", "attributes": {}, - "value": ["contents_origin", "indices_383674715747205", "indices_383679060169477", "indices_383791726646021", "indices_383795602420485", "progress_hash_555452035072389", "raw_data_383674715747205", "raw_data_383679060169477", "raw_data_383791726646021", "raw_data_383795602420485", "raw_data_388200929063813", "raw_data_388594665001861", "raw_data_391556354638725", "raw_data_parsed_383674715747205", "raw_data_parsed_383679060169477", "raw_data_parsed_383791726646021", "raw_data_parsed_383795602420485", "raw_data_parsed_388200929063813", "raw_data_parsed_388594665001861", "raw_data_parsed_391556354638725", "scores_383674715747205", "scores_383679060169477", "scores_383791726646021", "scores_383795602420485", "scores_388200929063813", "scores_388594665001861", "scores_391556354638725", "users"] + "value": ["contents_origin", "indices_383674715747205", "indices_383679060169477", "indices_383791726646021", "indices_383795602420485", "progress_hash_555452035072389", "raw_data_383674715747205", "raw_data_383679060169477", "raw_data_383791726646021", "raw_data_383795602420485", "raw_data_388200929063813", "raw_data_388594665001861", "raw_data_391556354638725", "raw_data_parsed_383674715747205", "raw_data_parsed_383679060169477", "raw_data_parsed_383791726646021", "raw_data_parsed_383795602420485", "raw_data_parsed_388200929063813", "raw_data_parsed_388594665001861", "raw_data_parsed_391556354638725", "scores", "scores_383674715747205", "scores_383679060169477", "scores_383791726646021", "scores_383795602420485", "scores_388200929063813", "scores_388594665001861", "scores_391556354638725", "users"] + } + +--- + + { + "type": "list", + "attributes": { + "names": { + "type": "character", + "attributes": {}, + "value": ["user_id", "user_name", "user_sex", "user_dob", "organization_name", "grade_name", "class_name"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7] + }, + "class": { + "type": "character", + "attributes": {}, + "value": ["data.frame"] + } + }, + "value": [ + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["integer64"] + } + }, + "value": [2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309] + }, + { + "type": "character", + "attributes": {}, + "value": ["陈佳洋", "茹祎", "于胜蓝", "李荟镕", "张忱涵", "连宗鸿", "徐小琳"] + }, + { + "type": "integer", + "attributes": {}, + "value": [2, 2, 2, 2, 2, 2, 2] + }, + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["Date"] + } + }, + "value": [-25202, -25201, -25200, -25199, -25198, -25197, -25196] + }, + { + "type": "character", + "attributes": {}, + "value": ["北京师范大学", "北京师范大学", "北京师范大学", "北京师范大学", "北京师范大学", "北京师范大学", "北京师范大学"] + }, + { + "type": "character", + "attributes": {}, + "value": ["睡眠实验", "睡眠实验", "睡眠实验", "睡眠实验", "睡眠实验", "睡眠实验", "睡眠实验"] + }, + { + "type": "character", + "attributes": {}, + "value": ["行为实验1April", "行为实验1April", "行为实验1April", "行为实验1April", "行为实验1April", "行为实验1April", "行为实验1April"] + } + ] + } + +--- + + { + "type": "list", + "attributes": { + "names": { + "type": "character", + "attributes": {}, + "value": ["project_id", "user_id", "game_id", "game_version", "game_time", "ability_id", "game_stage", "game_star", "game_score_raw", "game_score_std"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] + }, + "class": { + "type": "character", + "attributes": {}, + "value": ["data.frame"] + } + }, + "value": [ + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["integer64"] + } + }, + "value": [2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309, 2.74429768e-309] + }, + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["integer64"] + } + }, + "value": [2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309, 2.0185087e-309] + }, + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["integer64"] + } + }, + "value": [1.89560496e-309, 1.89560496e-309, 1.89560496e-309, 1.89560496e-309, 1.89560496e-309, 1.89560496e-309, 1.89560496e-309, 1.89562643e-309, 1.89562643e-309, 1.89562643e-309, 1.89562643e-309, 1.89562643e-309, 1.89562643e-309, 1.89562643e-309, 1.89618307e-309, 1.89618307e-309, 1.89618307e-309, 1.89618307e-309, 1.89618307e-309, 1.89618307e-309, 1.89618307e-309, 1.89620222e-309, 1.89620222e-309, 1.89620222e-309, 1.89620222e-309, 1.89620222e-309, 1.89620222e-309, 1.89620222e-309, 1.91796743e-309, 1.91796743e-309, 1.91796743e-309, 1.91796743e-309, 1.91796743e-309, 1.91796743e-309, 1.91796743e-309, 1.91991274e-309, 1.91991274e-309, 1.91991274e-309, 1.91991274e-309, 1.91991274e-309, 1.91991274e-309, 1.91991274e-309, 1.93454543e-309, 1.93454543e-309, 1.93454543e-309, 1.93454543e-309, 1.93454543e-309, 1.93454543e-309, 1.93454543e-309] + }, + { + "type": "character", + "attributes": {}, + "value": ["1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "1.0.0", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + }, + { + "type": "double", + "attributes": { + "tzone": { + "type": "character", + "attributes": {}, + "value": ["UTC"] + }, + "class": { + "type": "character", + "attributes": {}, + "value": ["POSIXct", "POSIXt"] + } + }, + "value": [1681982990, 1681983549, 1681985473, 1681985928, 1681987439, 1681988555, 1681988878, 1681983054, 1681983633, 1681985540, 1681986023, 1681987536, 1681988613, 1681988956, 1681983245, 1681983843, 1681985675, 1681986218, 1681987764, 1681988767, 1681989104, 1681983341, 1681983950, 1681985742, 1681986338, 1681987893, 1681988820, 1681989201, 1681982923, 1681983470, 1681985408, 1681985868, 1681987354, 1681988489, 1681988797, 1681982784, 1681983237, 1681985329, 1681985755, 1681987218, 1681988416, 1681988719, 1681983351, 1681983969, 1681985754, 1681986356, 1681987913, 1681988834, 1681989212] + }, + { + "type": "double", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["integer64"] + } + }, + "value": [1.89560665e-309, 1.89560665e-309, 1.89560665e-309, 1.89560665e-309, 1.89560665e-309, 1.89560665e-309, 1.89560665e-309, 1.89563061e-309, 1.89563061e-309, 1.89563061e-309, 1.89563061e-309, 1.89563061e-309, 1.89563061e-309, 1.89563061e-309, 1.89618415e-309, 1.89618415e-309, 1.89618415e-309, 1.89618415e-309, 1.89618415e-309, 1.89618415e-309, 1.89618415e-309, 1.89620278e-309, 1.89620278e-309, 1.89620278e-309, 1.89620278e-309, 1.89620278e-309, 1.89620278e-309, 1.89620278e-309, 1.9179678e-309, 1.9179678e-309, 1.9179678e-309, 1.9179678e-309, 1.9179678e-309, 1.9179678e-309, 1.9179678e-309, 1.91991319e-309, 1.91991319e-309, 1.91991319e-309, 1.91991319e-309, 1.91991319e-309, 1.91991319e-309, 1.91991319e-309, 1.93454615e-309, 1.93454615e-309, 1.93454615e-309, 1.93454615e-309, 1.93454615e-309, 1.93454615e-309, 1.93454615e-309] + }, + { + "type": "integer", + "attributes": {}, + "value": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + }, + { + "type": "integer", + "attributes": {}, + "value": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + }, + { + "type": "double", + "attributes": {}, + "value": [-0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + }, + { + "type": "double", + "attributes": {}, + "value": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + } + ] } diff --git a/tests/testthat/test-targets.R b/tests/testthat/test-targets.R index 74fa362..d04473c 100644 --- a/tests/testthat/test-targets.R +++ b/tests/testthat/test-targets.R @@ -52,10 +52,12 @@ test_that("Workflow works", { ~organization_name, ~project_name, "北京师范大学", "4.19-4.20夜晚睡眠test" ) - tar_prep_iquizoo(params) + tar_prep_iquizoo(params, combine = "scores") }) targets::tar_make(reporter = "silent", callr_function = NULL) expect_snapshot_value(targets::tar_objects(), style = "json2") + expect_snapshot_value(targets::tar_read(users), style = "json2") + expect_snapshot_value(targets::tar_read(scores), style = "json2") }) }) @@ -149,3 +151,23 @@ test_that("Ensure project date is used", { nrow(targets::tar_read(scores)) |> expect_gt(0) }) }) + +test_that("Ensure `check_progress = FALSE` work", { + targets::tar_dir({ + targets::tar_script({ + library(targets) + tar_prep_iquizoo( + contents = data.frame( + project_id = bit64::as.integer64(132121231360389), + game_id = bit64::as.integer64(268008982646879) + ), + what = "scores", + check_progress = FALSE + ) + }) + targets::tar_make(reporter = "silent", callr_function = NULL) + expect_true( + all(!startsWith(targets::tar_objects(), "progress_hash")) + ) + }) +})