|
1 |
| -#' Read contents of data package file and construct a data frame based on the |
2 |
| -#' metadata file summarizing the fields and their types/definitions. |
| 1 | +#' Loads EML-formatted metadata into R for inspection and/or editing |
| 2 | +#' |
| 3 | +#' @description `load_pkg_metadata()` is essentially a wrapper around `DPchecker::load_metadata` with the directory structure pre-set to work well the default location that `get_data_package` stores downloaded data packages. If you did not use the default settings for `get_data_package` (or downloaded a data package manually) you may find it easier to adjust the directory structure pointing to your data package and load the metadata using `DPchecker::load_metadata()`. Much like `load_metadata`, `load_pkg_metadata` requires that there be a single .xml file in the data package directory, that the metadata file name end in *_metadata.xml, and that the file contain schema-valid EML metadata. |
3 | 4 | #'
|
4 |
| -#' @description `load_pkg_metadata()` reads the metadata file from a previously |
5 |
| -#' downloaded package and loads a list of fields and their attributes into a |
6 |
| -#' dataframe. |
7 |
| -#' |
8 |
| -#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file. |
9 |
| -#' @param directory String. Path to the data package |
| 5 | +#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file. Your data should be in a directory that that has the holding ID as its name. |
| 6 | +#' @param directory String. Path to the data package directory, defaults to "data". |
10 | 7 | #'
|
11 | 8 | #' @return one data frame to the global environment.
|
12 | 9 | #'
|
|
16 | 13 | #' \dontrun{
|
17 | 14 | #' load_pgk_metadata(2266200)
|
18 | 15 | #' }
|
19 |
| -load_pkg_metadata <- function(holding_id, directory = here::here("data")) { |
20 |
| - data_package_directory <- paste(directory, "/", holding_id, sep = "") |
21 |
| - |
22 |
| - metadata_file <- list.files( |
23 |
| - path = data_package_directory, |
24 |
| - pattern = "metadata.xml" |
25 |
| - ) |
26 |
| - |
27 |
| - # Look for a metadatafile and let the user know about the results of the search. |
28 |
| - if (length(metadata_file) == 0) { |
29 |
| - cli::cli_abort(c( |
30 |
| - "No metadata file found in: {.path {data_package_directory}}.", |
31 |
| - "i" = "The filename must end in _metadata.xml")) |
32 |
| - return(invisible()) |
33 |
| - } |
34 |
| - if (length(metadata_file) > 1) { |
35 |
| - cli::cli_abort(c( |
36 |
| - "Multiple metadata files found.", |
37 |
| - "i" = "{.path {data_package_directory}} can contain only one |
38 |
| - {.file *_metadata.xml}.")) |
39 |
| - return(invisible()) |
40 |
| - } |
| 16 | +load_pkg_metadata <- function(holding_id, directory = "data") { |
41 | 17 |
|
42 |
| - meta_location <- paste0(data_package_directory, "/", metadata_file) |
43 |
| - if (!file.exists(meta_location)) { |
44 |
| - cli::cli_abort(c( |
45 |
| - "The data package for: {.var {holding_id}} was not found.", |
46 |
| - "i" = "Make sure {.path {data_package_directory}} is the correct location", |
47 |
| - "i" = "Make sure you downloaded the correct data package using {.fn get_data_package}." |
48 |
| - )) |
49 |
| - return(invisible()) |
50 |
| - } |
51 |
| - |
52 |
| - #load metadata |
53 |
| - eml_object <- EML::read_eml(meta_location, from = "xml") |
54 |
| - #attributeList <- EML::get_attributes(eml_object) |
55 |
| - attribute_list <- eml_object$dataset$dataTable$attributeList |
56 |
| - attributes <- attribute_list$attributes |
57 |
| - factors <- attribute_list$factors |
58 |
| - |
59 |
| - # Figure out column classes based on attribute table (character, numeric, integer, logical, or complex) |
60 |
| - attributes$columnclass <- "character" |
61 |
| - if (!"numberType" %in% colnames(attributes)) { |
62 |
| - attributes$numberType <- as.character(NA) |
63 |
| - } |
64 |
| - if (!"formatString" %in% colnames(attributes)) { |
65 |
| - attributes$formatString <- as.character(NA) |
66 |
| - } |
67 |
| - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "natural", "integer", attributes$columnclass) |
68 |
| - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "whole", "integer", attributes$columnclass) |
69 |
| - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "integer", "integer", attributes$columnclass) |
70 |
| - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "real", "numeric", attributes$columnclass) |
71 |
| - attributes$columnclass <- ifelse(attributes$storageType == "date" & attributes$formatString == "YYYY-MM-DD", "Date", attributes$columnclass) |
72 |
| - |
73 |
| - # return the field table to the workspace. |
74 |
| - return(attributes) |
75 |
| - |
76 |
| -if (metaformat == "fgdc") { |
77 |
| - # xmlFilename <- metalocation |
78 |
| - workingXMLfile <- EML::read_eml(metalocation, from = "xml") |
79 |
| - |
80 |
| - # Build attributes table from the xml file |
81 |
| - attributes <- data.frame( |
82 |
| - id = numeric(), |
83 |
| - attribute = character(), |
84 |
| - attributeDefinition = character(), |
85 |
| - attributeType = character(), |
86 |
| - attributeFactors = numeric(), |
87 |
| - stringsAsFactors = FALSE |
88 |
| - ) |
89 |
| - for (i in 1:length(workingXMLfile$ea$detailed$attr)) { |
90 |
| - attributes <- rbind( |
91 |
| - attributes, |
92 |
| - cbind( |
93 |
| - id = i, |
94 |
| - attribute = workingXMLfile$ea$detailed$attr[[i]]$attrlabl, |
95 |
| - attributeDefinition = workingXMLfile$ea$detailed$attr[[i]]$attrdef, |
96 |
| - attributeType = workingXMLfile$ea$detailed$attr[[i]]$attrtype, |
97 |
| - attributeFactors = length(workingXMLfile$ea$detailed$attr[[i]]$attrdomv) |
98 |
| - ) |
99 |
| - ) |
100 |
| - } |
101 |
| - |
102 |
| - attributes$id <- as.integer(as.character(attributes$id)) |
103 |
| - attributes$attribute <- as.character(attributes$attribute) |
104 |
| - attributes$attributeDefinition <- as.character(attributes$attributeDefinition) |
105 |
| - # attributes$attributeType<-as.character(attributes$attributeType) |
106 |
| - attributes$attributeFactors <- as.integer(as.character(attributes$attributeFactors)) |
107 |
| - |
108 |
| - attributes$columnclass <- "character" |
109 |
| - # attributes$columnclass<-ifelse(attributes$attributeType=="OID","integer",attributes$columnclass) |
110 |
| - # attributes$columnclass<-ifelse(attributes$attributeType=="Date","Date",attributes$columnclass) |
111 |
| - # attributes$columnclass<-ifelse(attributes$attributeType=="Double","numeric",attributes$columnclass) |
112 |
| - |
113 |
| - cat("Found ", crayon::blue$bold(nrow(attributes)), " fields.", sep = "") |
| 18 | + meta <- DPchecker::load_metadata(directory = here::here("data", holding_id)) |
| 19 | + |
| 20 | + return(invisible(meta)) |
| 21 | +} |
114 | 22 |
|
115 |
| - # return the field table to the workspace. |
116 |
| - return(attributes) |
117 |
| - } else { |
118 |
| - print("data/metadata format combination not supported") |
119 |
| - } |
120 |
| -} |
0 commit comments