Skip to content

Commit c87682f

Browse files
authored
Merge pull request nationalparkservice#62 from RobLBaker/master
fix load_pkg_metadata; add metanalysis functions
2 parents cded127 + dc23032 commit c87682f

16 files changed

+398
-133
lines changed

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export(get_park_taxon_citations)
1111
export(get_park_taxon_refs)
1212
export(get_park_taxon_url)
1313
export(get_ref_info)
14+
export(get_ref_list)
15+
export(get_refs_info)
1416
export(get_unit_code)
1517
export(get_unit_code_info)
1618
export(get_unit_info)
@@ -22,6 +24,7 @@ export(load_domains)
2224
export(load_pkg_metadata)
2325
export(map_wkt)
2426
export(rm_local_packages)
27+
export(summarize_packages)
2528
export(validate_data_package)
2629
importFrom(lifecycle,deprecated)
2730
importFrom(magrittr,"%>%")

NEWS.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# NPSutils 0.3.3 (under development)
22

3+
## 2024-12-19
4+
* updated `load_pkg_metadata` to be simpler and essentially call `DPchecker::load_metadata` but with a preset default directory structure that works well with the default settings for `get_data_package`.
5+
* Add meta-analysis functions for finding and producing summary statistics multiple data packages including `get_ref_list`, `get_refs_info()`, and `summarize_packages`.
36
## 2024-10-24
47
* fix how `get_data_package` aliases `get_data_packages`, specifically now allows users to adjust parameters to non-default settings.
58
## 2024-10-21

R/load_data_package.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#'
1212
#' @examples
1313
#' \dontrun{
14-
#' load_data_package(2272461)
14+
#' load_data_package_deprecated(2272461)
1515
#' }
1616
load_data_package_deprecated <- function(reference_id) {
1717
data_package_directory <- paste("data/", reference_id, sep = "")

R/load_data_packages.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,4 @@ extract_tbl <- function(x) {
197197
if (!is.list(x))
198198
return(NULL)
199199
unlist(lapply(x, extract_tbl), FALSE)
200-
}
200+
}

R/load_pgk_metadata.R

+10-108
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
#' Read contents of data package file and construct a data frame based on the
2-
#' metadata file summarizing the fields and their types/definitions.
1+
#' Loads EML-formatted metadata into R for inspection and/or editing
2+
#'
3+
#' @description `load_pkg_metadata()` is essentially a wrapper around `DPchecker::load_metadata` with the directory structure pre-set to work well the default location that `get_data_package` stores downloaded data packages. If you did not use the default settings for `get_data_package` (or downloaded a data package manually) you may find it easier to adjust the directory structure pointing to your data package and load the metadata using `DPchecker::load_metadata()`. Much like `load_metadata`, `load_pkg_metadata` requires that there be a single .xml file in the data package directory, that the metadata file name end in *_metadata.xml, and that the file contain schema-valid EML metadata.
34
#'
4-
#' @description `load_pkg_metadata()` reads the metadata file from a previously
5-
#' downloaded package and loads a list of fields and their attributes into a
6-
#' dataframe.
7-
#'
8-
#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file.
9-
#' @param directory String. Path to the data package
5+
#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file. Your data should be in a directory that that has the holding ID as its name.
6+
#' @param directory String. Path to the data package directory, defaults to "data".
107
#'
118
#' @return one data frame to the global environment.
129
#'
@@ -16,105 +13,10 @@
1613
#' \dontrun{
1714
#' load_pgk_metadata(2266200)
1815
#' }
19-
load_pkg_metadata <- function(holding_id, directory = here::here("data")) {
20-
data_package_directory <- paste(directory, "/", holding_id, sep = "")
21-
22-
metadata_file <- list.files(
23-
path = data_package_directory,
24-
pattern = "metadata.xml"
25-
)
26-
27-
# Look for a metadatafile and let the user know about the results of the search.
28-
if (length(metadata_file) == 0) {
29-
cli::cli_abort(c(
30-
"No metadata file found in: {.path {data_package_directory}}.",
31-
"i" = "The filename must end in _metadata.xml"))
32-
return(invisible())
33-
}
34-
if (length(metadata_file) > 1) {
35-
cli::cli_abort(c(
36-
"Multiple metadata files found.",
37-
"i" = "{.path {data_package_directory}} can contain only one
38-
{.file *_metadata.xml}."))
39-
return(invisible())
40-
}
16+
load_pkg_metadata <- function(holding_id, directory = "data") {
4117

42-
meta_location <- paste0(data_package_directory, "/", metadata_file)
43-
if (!file.exists(meta_location)) {
44-
cli::cli_abort(c(
45-
"The data package for: {.var {holding_id}} was not found.",
46-
"i" = "Make sure {.path {data_package_directory}} is the correct location",
47-
"i" = "Make sure you downloaded the correct data package using {.fn get_data_package}."
48-
))
49-
return(invisible())
50-
}
51-
52-
#load metadata
53-
eml_object <- EML::read_eml(meta_location, from = "xml")
54-
#attributeList <- EML::get_attributes(eml_object)
55-
attribute_list <- eml_object$dataset$dataTable$attributeList
56-
attributes <- attribute_list$attributes
57-
factors <- attribute_list$factors
58-
59-
# Figure out column classes based on attribute table (character, numeric, integer, logical, or complex)
60-
attributes$columnclass <- "character"
61-
if (!"numberType" %in% colnames(attributes)) {
62-
attributes$numberType <- as.character(NA)
63-
}
64-
if (!"formatString" %in% colnames(attributes)) {
65-
attributes$formatString <- as.character(NA)
66-
}
67-
attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "natural", "integer", attributes$columnclass)
68-
attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "whole", "integer", attributes$columnclass)
69-
attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "integer", "integer", attributes$columnclass)
70-
attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "real", "numeric", attributes$columnclass)
71-
attributes$columnclass <- ifelse(attributes$storageType == "date" & attributes$formatString == "YYYY-MM-DD", "Date", attributes$columnclass)
72-
73-
# return the field table to the workspace.
74-
return(attributes)
75-
76-
if (metaformat == "fgdc") {
77-
# xmlFilename <- metalocation
78-
workingXMLfile <- EML::read_eml(metalocation, from = "xml")
79-
80-
# Build attributes table from the xml file
81-
attributes <- data.frame(
82-
id = numeric(),
83-
attribute = character(),
84-
attributeDefinition = character(),
85-
attributeType = character(),
86-
attributeFactors = numeric(),
87-
stringsAsFactors = FALSE
88-
)
89-
for (i in 1:length(workingXMLfile$ea$detailed$attr)) {
90-
attributes <- rbind(
91-
attributes,
92-
cbind(
93-
id = i,
94-
attribute = workingXMLfile$ea$detailed$attr[[i]]$attrlabl,
95-
attributeDefinition = workingXMLfile$ea$detailed$attr[[i]]$attrdef,
96-
attributeType = workingXMLfile$ea$detailed$attr[[i]]$attrtype,
97-
attributeFactors = length(workingXMLfile$ea$detailed$attr[[i]]$attrdomv)
98-
)
99-
)
100-
}
101-
102-
attributes$id <- as.integer(as.character(attributes$id))
103-
attributes$attribute <- as.character(attributes$attribute)
104-
attributes$attributeDefinition <- as.character(attributes$attributeDefinition)
105-
# attributes$attributeType<-as.character(attributes$attributeType)
106-
attributes$attributeFactors <- as.integer(as.character(attributes$attributeFactors))
107-
108-
attributes$columnclass <- "character"
109-
# attributes$columnclass<-ifelse(attributes$attributeType=="OID","integer",attributes$columnclass)
110-
# attributes$columnclass<-ifelse(attributes$attributeType=="Date","Date",attributes$columnclass)
111-
# attributes$columnclass<-ifelse(attributes$attributeType=="Double","numeric",attributes$columnclass)
112-
113-
cat("Found ", crayon::blue$bold(nrow(attributes)), " fields.", sep = "")
18+
meta <- DPchecker::load_metadata(directory = here::here("data", holding_id))
19+
20+
return(invisible(meta))
21+
}
11422

115-
# return the field table to the workspace.
116-
return(attributes)
117-
} else {
118-
print("data/metadata format combination not supported")
119-
}
120-
}

0 commit comments

Comments
 (0)