Skip to content

Commit

Permalink
update to new version for new filter requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
btskinner committed Jul 12, 2021
1 parent 9c5881d commit 924e861
Show file tree
Hide file tree
Showing 33 changed files with 328 additions and 103 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rscorecard
Title: A Method to Download Department of Education College Scorecard Data
Version: 0.18.0
Version: 0.19.0
Authors@R: person("Benjamin", "Skinner",
email = "btskinner@coe.ufl.edu",
role = c("aut", "cre"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# v0.19.0

- update to only allow filtering on variables allowed due to API
changes on 4 April 2021

# v0.18.0

- update dictionary for 12 January 2021 release of data
Expand Down
60 changes: 53 additions & 7 deletions R/dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
#' @param return_df Return a tibble of the subset data dictionary.
#' @param print_off Do not print to console; useful if you only want
#' to return a tibble of dictionary values.
#' @param can_filter Use to confirm that a variable can be used as a
#' filtering variable. Returns \code{TRUE} or \code{FALSE}
#' @param filter_vars Use to print variables that can be used to
#' filter calls. Use with argument \code{return_df = TRUE} to
#' return a tibble of these variables in addition to console
#' output.
#'
#' @examples
#' ## simple search for 'state' in any part of the dictionary
Expand All @@ -35,6 +41,9 @@
#' ## return full dictionary (only recommended if not printing and
#' ## storing in object)
#' df <- sc_dict('.', limit = Inf, print_off = TRUE, return_df = TRUE)
#'
#' ## print list of variables that can be used to filter
#' df <- sc_dict('.', filter_vars = TRUE, return_df = TRUE)

#' @export
sc_dict <- function(search_string,
Expand All @@ -47,13 +56,43 @@ sc_dict <- function(search_string,
'source'),
ignore_case = TRUE, limit = 10, confirm = FALSE,
print_dev = FALSE, print_notes = FALSE,
return_df = FALSE, print_off = FALSE) {
return_df = FALSE, print_off = FALSE,
can_filter = FALSE, filter_vars = FALSE) {

## only for confirm
if (confirm) {
return(!is.null(sc_hash[[search_string]]))
}

## only for can_filter
if (can_filter) {
## NB: using any() to be TRUE if any TRUE
return(any(dict[["can_filter"]][dict[["varname"]] == search_string] == 1))
}

## print filter variables
if (filter_vars) {
out <- dict[dict[["can_filter"]] == 1,]
uniqv <- sort(unique(out[["varname"]]))
## console table
cat('\n' %+% paste(rep('', 70), collapse = '-') %+% '\n')
cat('The following variables can be used in sc_filter():')
cat('\n' %+% paste(rep('', 70), collapse = '-') %+% '\n\n')
for (i in 1:length(uniqv)) {
cat(' - ' %+% uniqv[i] %+% '\n')
}
if (return_df) {
cat('\n')
var_order <- c('varname', 'value', 'label', 'description', 'source',
'dev_friendly_name', 'dev_category', 'notes', 'can_filter')
out <- tidyr::as_tibble(out) %>%
dplyr::select(dplyr::one_of(var_order))
return(out)
} else {
return(cat('\n'))
}
}

## get values
if (match.arg(search_col) == 'all') {
rows <- rep(FALSE, nrow(dict))
Expand Down Expand Up @@ -126,7 +165,12 @@ sc_dict <- function(search_string,
}
cat('\n')
}

cat('CAN FILTER? ')
if (d[['can_filter']][[1]] == 1) {
cat('Yes\n\n')
} else {
cat('No\n\n')
}
}

cat(paste(rep('', 70), collapse = '-') %+% '\n')
Expand All @@ -138,10 +182,12 @@ sc_dict <- function(search_string,

## return_df ? return (out) : <>
if (return_df) {
var_order <- c('varname', 'value', 'label', 'description', 'source',
'dev_friendly_name', 'dev_category', 'notes')
out <- tidyr::as_tibble(out) %>%
dplyr::select(dplyr::one_of(var_order))
return(out)
var_order <- c('varname', 'value', 'label', 'description', 'source',
'dev_friendly_name', 'dev_category', 'notes', 'can_filter')
out <- tidyr::as_tibble(out) %>%
dplyr::select(dplyr::one_of(var_order))
return(out)
}


}
7 changes: 7 additions & 0 deletions R/filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ sc_filter_ <- function(sccall, filter_string) {
stop('Must use either \"==\", \"!=\", or \"%in%\" in sc_filter.',
call. = FALSE)
}
if (!sc_dict(tolower(as.character(filter[[i]][[2]])), can_filter = TRUE)) {
stop('The variable \"' %+% filter[[i]][[2]]
%+% '\" cannot be used as filter. '
%+% 'Revise your call to remove this filtering variable.\n'
%+% 'Use sc_dict(filter_vars = TRUE) to see available filters.',
call. = FALSE)
}
if (!sc_dict(tolower(as.character(filter[[i]][[2]])), confirm = TRUE)) {
stop('Variable \"' %+% filter[[i]][[2]]
%+% '\" not found in dictionary. '
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ key in your R environment at the start of your R session using
## # A tibble: 6 x 4
## unitid instnm stabbr year
## <int> <chr> <chr> <chr>
## 1 214625 Pennsylvania State University-Penn State New Kensington PA latest
## 2 196051 SUNY Morrisville NY latest
## 3 194392 Paul Smiths College of Arts and Science NY latest
## 4 214643 Pennsylvania State University-Penn State Wilkes-Barre PA latest
## 5 191676 Houghton College NY latest
## 6 197230 Wells College NY latest
## 1 191676 Houghton College NY latest
## 2 194392 Paul Smiths College of Arts and Science NY latest
## 3 196051 SUNY Morrisville NY latest
## 4 197230 Wells College NY latest
## 5 214625 Pennsylvania State University-Penn State New Kensington PA latest
## 6 214643 Pennsylvania State University-Penn State Wilkes-Barre PA latest

For more example calls and information about other package commands, see
the [extended
Expand Down Expand Up @@ -91,6 +91,8 @@ function.
## 3 = Private for-profit
## 4 = Foreign
##
## CAN FILTER? Yes
##
##
## ---------------------------------------------------------------------
## varname: schtype source: FSA
Expand All @@ -105,6 +107,8 @@ function.
## 2 = Private, Nonprofit
## 3 = Proprietary
##
## CAN FILTER? No
##
## ---------------------------------------------------------------------
## Printed information for 2 of out 2 variables.

Expand Down
40 changes: 26 additions & 14 deletions data-raw/make_dict_hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ sheets <- c("institution_data_dictionary",

## read in each sheet, munge, and bind
df <- purrr::map(sheets,
~ read_excel(file, sheet = .x) %>%
~ read_excel(file, sheet = .x) |>
## lower names
setNames(tolower(names(.))) %>%
rename_all(tolower) |>
## subset/rename
select(description = `name of data element`,
dev_category = `dev-category`,
Expand All @@ -25,27 +25,39 @@ df <- purrr::map(sheets,
value,
label,
source,
notes) %>%
notes,
can_filter = index) |>
## lower name values for varname column
mutate(varname = tolower(varname)) %>%
mutate(varname = tolower(varname)) |>
## remove extra \r\n from description column
mutate(description = gsub("^(.+)\r\n 0.*$", "\\1", description)) %>%
mutate(description = gsub("^(.+)\r\n 0.*$", "\\1", description)) |>
## remove trailing characters from dev_friendly_name
mutate(dev_friendly_name = gsub("^(.+):$", "\\1",
dev_friendly_name)) %>%
dev_friendly_name)) |>
## convert can_filter column to 1 if there's text there
mutate(can_filter = ifelse(!is.na(can_filter), 1, can_filter),
## fix unitid can_filter to 1 since it works for filtering
can_filter = ifelse(varname == "unitid", 1, can_filter),
## convert to integer
can_filter = as.integer(can_filter)) |>
## roll values forward to fill NA
mutate(description = na.locf(description),
dev_category = na.locf(dev_category),
dev_friendly_name = na.locf(dev_friendly_name),
varname = na.locf(varname),
source = na.locf(source),
notes = na.locf(notes))
) %>%
notes = na.locf(notes)) |>
## roll values forward in can_filter, grouped by variable name
group_by(varname) |>
mutate(can_filter = na.locf(can_filter, na.rm = FALSE)) |>
ungroup() |>
mutate(can_filter = ifelse(is.na(can_filter), 0, can_filter))
) |>
## bind together
bind_rows
bind_rows()

## make dictionary data frame
dict <- df %>% data.frame(.)
dict <- df |> data.frame()

## create hash environment for quick conversion between varnames
## and developer-friendly names
Expand All @@ -55,7 +67,7 @@ sc_hash <- new.env(parent = emptyenv())
## (1) varname <- dev_friendly
## (2) varname_c <- root

tmp <- df %>% distinct(varname, .keep_all = TRUE)
tmp <- df |> distinct(varname, .keep_all = TRUE)

for (i in 1:nrow(tmp)) {
## key/value pair (1)
Expand All @@ -72,7 +84,7 @@ for (i in 1:nrow(tmp)) {
## (3) dev_friendly <- varname
## (4) dev_friendly_c <- root

tmp <- df %>% distinct(dev_friendly_name, .keep_all = TRUE)
tmp <- df |> distinct(dev_friendly_name, .keep_all = TRUE)

for (i in 1:nrow(tmp)) {
## key/value pair (3)
Expand All @@ -96,8 +108,8 @@ for (i in 1:nrow(tmp)) {
## the information will continue to come from the institution data
both_sources <- c("unitid", "opeid6", "control", "main", "instnm")

tmp <- df %>%
filter(varname %in% both_sources) %>%
tmp <- df |>
filter(varname %in% both_sources) |>
distinct(dev_category, varname, .keep_all = TRUE)

for (i in 1:nrow(tmp)) {
Expand Down
4 changes: 2 additions & 2 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/CODE_OF_CONDUCT.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 15 additions & 9 deletions docs/articles/commands.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 924e861

Please sign in to comment.