Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #22

Merged
merged 6 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
^inst/hex.R$
README.Rmd
^doc$
^\.github$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
49 changes: 49 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
release:
types: [published]
workflow_dispatch:

name: pkgdown.yaml

permissions: read-all

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::pkgdown, local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.5.0
with:
clean: false
branch: gh-pages
folder: docs
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Depends:
R (>= 4.3.0)
Imports:
cli,
corroboree,
corella,
curl,
dplyr,
elm,
Expand All @@ -46,6 +46,7 @@ Suggests:
testthat (>= 3.0.0),
xml2
License: MPL-2.0
URL: https://galaxias.ala.org.au
BugReports: https://github.com/AtlasOfLivingAustralia/galaxias/issues
Maintainer: Martin Westgate <martin.westgate@csiro.au>
Encoding: UTF-8
Expand Down
14 changes: 9 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ export(galaxias_project)
export(get_validator_report)
export(validate_archive)
importFrom(cli,cat_line)
importFrom(cli,cli_abort)
importFrom(cli,cli_h2)
importFrom(cli,cli_h3)
importFrom(corroboree,check_occurrences)
importFrom(cli,cli_inform)
importFrom(cli,cli_progress_step)
importFrom(cli,cli_progress_update)
importFrom(corella,check_occurrences)
importFrom(curl,form_data)
importFrom(curl,form_file)
importFrom(dplyr,bind_rows)
Expand All @@ -24,11 +28,11 @@ importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,slice_head)
importFrom(elm,add_elm_header)
importFrom(elm,check_elm)
importFrom(elm,read_elm)
importFrom(elm,add_eml_header)
importFrom(elm,check_eml)
importFrom(elm,read_md)
importFrom(elm,use_metadata)
importFrom(elm,write_elm)
importFrom(elm,write_eml)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(httr2,req_body_multipart)
Expand Down
51 changes: 33 additions & 18 deletions R/build_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#'
#' * One or more `csv` files such as `occurrences.csv` &/or `events.csv`.
#' These will be manipulated versions of the raw dataset, which have been
#' altered to use Darwin Core terms as column headers. See the `corroboree`
#' altered to use Darwin Core terms as column headers. See the `corella`
#' package for details.
#' * A metadata statement, stored in xml using the filename `eml.xml`. The
#' function `use_metadata()` from the `elm` package is a good starting point
Expand All @@ -34,12 +34,24 @@
#' @export
build_archive <- function(x = "data", file) {
x <- get_default_directory(x)

progress_update("Retrieving metadata...")
files_in <- find_data(x)

progress_update("Creating zip folder...")
file_out <- get_default_file(file)

progress_update("Building Darwin Core Archive...")
zip::zip(zipfile = file_out,
files = files_in,
mode = "cherry-pick")
invisible(return(file_out))

cli::cli_alert_success("Darwin Core Archive successfully built. \nSaved as {.file {file_out}}.")
cli::cli_progress_done()

# invisible(return(file_out)) # might need this to save


}

#' Simple function to specify a zip file if no arg given
Expand All @@ -52,7 +64,7 @@ get_default_file <- function(file){
glue("{getwd()}.zip")
}else{
if(!grepl(".zip$", file)){
abort("file must end in `.zip`")
abort("File must end in `.zip`.")
}else{
file
}
Expand All @@ -62,17 +74,18 @@ get_default_file <- function(file){
#' Simple function to check that a `data` directory exists if no arg given
#' @importFrom rlang abort
#' @importFrom rlang inform
#' @importFrom cli cli_inform
#' @importFrom glue glue
#' @noRd
#' @keywords Internal
get_default_directory <- function(x){
if(missing(x)){
if(dir.exists("data")){
inform("`x` is missing; defaulting to `data` folder")
cli_inform("Missing `directory`. Defaulting to {.file data} folder.")
x <- "data"
}else{
abort(c("`x` is missing, and `data` folder is missing",
i = "please supply a folder containing required data"))
abort(c("Missing `directory` and missing `data` folder.",
i = "Please specify a folder containing required data."))
}
}else{
if(!dir.exists(x)){
Expand All @@ -86,15 +99,17 @@ get_default_directory <- function(x){
#' Find metadata info in a repository
#' @importFrom glue glue_collapse
#' @importFrom rlang abort
#' @importFrom cli cli_abort
#' @importFrom rlang caller_env
#' @noRd
#' @keywords Internal
find_data <- function(directory,
call = caller_env()){
if(!file.exists(directory)){
bullets <- c(glue("`{directory}` directory is required, but missing."),
i = "use `usethis::use_data()` to add data to your project.")
abort(bullets,
bullets <- c(glue("Missing `directory`."),
i = "Use `usethis::use_data()` to add data to your project.",
x = "Can't find directory `{directory}`.")
cli_abort(bullets,
call = call)
}
accepted_names <- c("occurrences",
Expand All @@ -105,24 +120,24 @@ find_data <- function(directory,
pattern = glue("^{accepted_names}.csv$"))
if(length(file_list) < 1){
bullets <- c("No data meeting Darwin Core requirements is given in `data`.",
i = "use `add_bd_data_raw()` for examples of how to add raw data to your package",
i = "use `usethis::use_data()` to add data to your package")
i = "Use `add_bd_data_raw()` for examples of how to add raw data to your package.",
i = "Use `usethis::use_data()` to add data to your package.")
abort(bullets,
call = call)
}

if(!file.exists(glue("{directory}/meta.xml"))){
bullets <- c("No schema file (`meta.xml`) is present in the specified directory.",
i = "use `build_schema()` to create one")
abort(bullets,
bullets <- c("No schema file ({.file meta.xml}) is present in the specified directory.",
i = "Use `build_schema()` to create a schema file.")
cli_abort(bullets,
call = call)
}

if(!file.exists(glue("{directory}/eml.xml"))){
bullets <- c("No metadata statement (`eml.xml`) is present in the specified directory.",
i = "See `elm::use_metadata()` for an example metadata statement,",
i = "then `build_metadata()` to convert to `eml.xml`.")
abort(bullets,
bullets <- c("No metadata statement ({.file eml.xml}) is present in the specified directory.",
i = "See `elm::use_metadata()` for an example metadata statement.",
i = "Use `build_metadata()` to convert to {.file eml.xml}.")
cli_abort(bullets,
call = call)
}

Expand Down
26 changes: 17 additions & 9 deletions R/build_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,33 @@
#' specified using the `directory` argument.
#'
#' This function is a fairly shallow wrapper on top of functionality build
#' in the `elm` package, particularly `read_elm()` and `write_elm()`. You can
#' in the `elm` package, particularly `read_md()` and `write_eml()`. You can
#' use that package to gain greater control, or to debug problems, should you
#' wish.
#' @param x Path to a metadata statement stored in markdown format (.md).
#' @param path Path to a metadata statement stored in markdown format (.md).
#' @param file A file where the result should be saved. Defaults to
#' `data/eml.xml`.
#' @returns Does not return an object to the workspace; called for the side
#' effect of building a file named `meta.xml` in the `data` directory.
#' @importFrom elm add_elm_header
#' @importFrom elm read_elm
#' @importFrom elm write_elm
#' @importFrom elm add_eml_header
#' @importFrom elm read_md
#' @importFrom elm write_eml
#' @export
build_metadata <- function(x = "data",
file = "./data/eml.xml"){
if(!file.exists(x)){
abort("`x` doesn't exist in specified location.")
cli::cli_abort("{.file {x}} doesn't exist in specified location.")
}
# import file, ensure EML metadata is added, convert to XML
read_elm(x) |>
add_elm_header() |>
write_elm(file = file)
progress_update("Reading file...")
metadata_file <- read_md(x)

progress_update("Building xml components...")
built_file <- add_eml_header(metadata_file)

progress_update("Writing file...")
write_eml(built_file, file = file)

cli::cli_alert_success("Metadata successfully built. Saved as {.file /data/eml.xml}.")
cli::cli_progress_done()
}
56 changes: 48 additions & 8 deletions R/build_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,55 @@
#' @param file (string) A file name for the resulting schema document.
#' @returns Does not return an object to the workspace; called for the side
#' effect of building a file named `meta.xml` in the specified directory.
#' @importFrom elm write_elm
#' @importFrom elm write_eml
#' @importFrom glue glue
#' @importFrom rlang abort
#' @export
build_schema <- function(x = "data",
file = "./data/meta.xml") {
x <- get_default_directory(x)
x |>
detect_dwc_files() |>
detect_dwc_fields() |>
add_front_matter() |>
write_elm(file = file)

files <- detect_dwc_files(x)
fields <- detect_dwc_fields(files)
result <- add_front_matter(fields)

progress_update("Writing file...")
write_eml(result, file = file)

cli::cli_alert_success("Schema successfully built. Saved as {.file /data/meta.xml}.")
cli::cli_progress_done()
}

#' Wait time
#' @noRd
#' @keywords Internal
wait <- function(seconds = 1) {
Sys.sleep(seconds)
}


#' Function progress message
#'
#' @description
#' Informs users about the progress of their ongoing function steps.
#'
#' @importFrom cli cli_progress_step
#' @importFrom cli cli_progress_update
#' @noRd
#' @keywords Internal
progress_update <- function(message) {
cli::cli_progress_step(
paste0(
message
),
spinner = TRUE
)

for (i in 1:100) {
wait(0.0001) # remove zeroes to make messages slower
cli::cli_progress_update()
}

}

#' Internal function to create core/extension framework for files
Expand All @@ -35,6 +72,7 @@ build_schema <- function(x = "data",
#' @noRd
#' @keywords Internal
detect_dwc_files <- function(directory){
progress_update("Detecting Darwin Core files...")
available_exts <- dwc_extensions()
supported_files <- available_exts |>
pull("file")
Expand All @@ -47,8 +85,8 @@ detect_dwc_files <- function(directory){
sep = ", ",
last = " or ")
bullets <- c(
glue("Specified directory (\"{directory}\") does not contain any dwc-compliant csv files."),
i = glue("Accepted names are {file_names}"))
glue("Specified directory (\"{directory}\") does not contain any Darwin Core-compliant csv files."),
i = glue("Accepted names are {file_names}."))
abort(bullets)
}
available_exts |>
Expand Down Expand Up @@ -100,6 +138,7 @@ dwc_extensions <- function(){
#' @noRd
#' @keywords Internal
detect_dwc_fields <- function(df){
progress_update("Detecting Darwin Core fields in dataset...")
split(df, seq_len(nrow(df))) |>
map(\(x){
bind_rows(create_schema_row(x),
Expand Down Expand Up @@ -181,6 +220,7 @@ get_field_names <- function(file){
#' @noRd
#' @keywords Internal
add_front_matter <- function(df){
progress_update("Building xml components...")
front_row <- tibble(
level = 1,
label = "archive",
Expand Down
10 changes: 5 additions & 5 deletions R/check_archive.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Check an archive against Darwin Core standards
#'
#' This is a wrapper to two other packages; schema and EML files (i.e. xml) are
#' checked with the `elm` package; csv files are checked with the `corroboree`
#' checked with the `elm` package; csv files are checked with the `corella`
#' package.
#' @param x (string) A directory containing the files to be published, or
#' optionally a `.zip` file built from the same (i.e. with `build_archive()`).
Expand All @@ -26,8 +26,8 @@ check_archive <- function(x = "data"){
}

#' Internal function to check all files
#' @importFrom corroboree check_occurrences
#' @importFrom elm check_elm
#' @importFrom corella check_occurrences
#' @importFrom elm check_eml
#' @importFrom purrr map
#' @importFrom readr read_csv
#' @noRd
Expand All @@ -38,8 +38,8 @@ check_files <- function(filenames){
switch(a,
"occurrences.csv" = {read_csv(a) |>
check_occurrences()},
"meta.xml" = {check_elm(a)},
"eml.xml" = {check_elm(a)}
"meta.xml" = {check_eml(a)},
"eml.xml" = {check_eml(a)}
)
}) |>
invisible()
Expand Down
Loading
Loading