Skip to content

Commit

Permalink
update for 20 July 2021 data dictionary update
Browse files Browse the repository at this point in the history
  • Loading branch information
btskinner committed Jul 28, 2021
1 parent 64b277a commit 7669009
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rscorecard
Title: A Method to Download Department of Education College Scorecard Data
Version: 0.19.1
Version: 0.20.0
Authors@R: person("Benjamin", "Skinner",
email = "btskinner@coe.ufl.edu",
role = c("aut", "cre"),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# v0.20.0

- update dictionary for 20 July 2021 release of data

# v0.19.1

## Bug fix
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
22 changes: 16 additions & 6 deletions data-raw/make_dict_hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ link <- paste0("https://collegescorecard.ed.gov/assets/", file)
download.file(link, file)

## sheet names
sheets <- c("institution_data_dictionary",
"FieldOfStudy_data_dictionary")
sheets <- c("Institution_Data_Dictionary",
"FieldOfStudy_Data_Dictionary")

## read in each sheet, munge, and bind
df <- purrr::map(sheets,
Expand Down Expand Up @@ -45,13 +45,23 @@ df <- purrr::map(sheets,
dev_category = na.locf(dev_category),
dev_friendly_name = na.locf(dev_friendly_name),
varname = na.locf(varname),
source = na.locf(source),
notes = na.locf(notes)) |>
source = na.locf(source)) |>
## roll values forward in can_filter, grouped by variable name
group_by(varname) |>
mutate(can_filter = na.locf(can_filter, na.rm = FALSE)) |>
mutate(can_filter = na.locf(can_filter, na.rm = FALSE),
notes = na.locf(notes, na.rm = FALSE)) |>
ungroup() |>
mutate(can_filter = ifelse(is.na(can_filter), 0, can_filter))
mutate(can_filter = ifelse(is.na(can_filter), 0, can_filter),
notes_ibid = ifelse(grepl("ibid", notes), 1, 0),
notes_disc = ifelse(grepl("^Discontinued;", notes), 1, 0),
notes = ifelse(grepl("ibid", notes), NA, notes),
notes = ifelse(is.na(notes) & notes_ibid == 1,
na.locf(notes),
notes),
notes = ifelse(notes_disc == 1,
paste("DISCONTINUED:", notes),
notes)) |>
select(-notes_ibid, -notes_disc)
) |>
## bind together
bind_rows()
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ test_that('Dictionary does not return correct object', {
source = 'IPEDS',
dev_friendly_name = 'state',
dev_category = 'school',
notes = 'Shown/used on consumer website.',
notes = NA_character_,
can_filter = 1)

df2 <- sc_dict('stabbr', return_df = TRUE, print_off = TRUE)
Expand Down Expand Up @@ -46,7 +46,7 @@ test_that('Dictionary does not return correct object', {
source = 'IPEDS',
dev_friendly_name = 'state_fips',
dev_category = 'school',
notes = 'Shown/used on consumer website.',
notes = NA_character_,
can_filter = 1)

df2 <- sc_dict('st_fips', return_df = TRUE, print_off = TRUE)
Expand Down

0 comments on commit 7669009

Please sign in to comment.