From f6bc6e09df3002e33df6c1d10d04d2e8bc2473b4 Mon Sep 17 00:00:00 2001 From: jangevaare Date: Tue, 23 Apr 2024 13:48:05 +0000 Subject: [PATCH 1/2] Fix erroneous collection dates --- processing/process.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/processing/process.R b/processing/process.R index 7390740..6594718 100644 --- a/processing/process.R +++ b/processing/process.R @@ -160,14 +160,25 @@ wtisen_data = read_csv( "MUNICIPALITY", "COUNTY"), .fns = \(x) str_replace(x, "_", " ")), + across( .cols = starts_with("DATE_"), .fns = \(x) {x |> as_datetime(format = c("%m/%d/%Y %I:%M:%S %p", "%Y-%m-%d %H:%M:%S")) |> force_tz(tz = "America/Toronto")}), + + # Convert erroneous collection dates to NAs + DATE_COLLECTED = case_when( + # Collected >6 months before receipt + DATE_COLLECTED < (DATE_RECEIVED - months(6)) ~ NA_POSIXct_, + # Collected after lab receipt + DATE_COLLECTED > DATE_RECEIVED ~ NA_POSIXct_, + .default = DATE_COLLECTED), + across( .cols = where(is.character), .fns = \(x) str_trim(x)), + POSTAL = postalcode_cleaner(POSTAL), ENTRY = as.integer(ENTRY), REQ_LEGIBLE = str_detect(REQ_LEGIBLE, "^y|Y$")) From 64bd978be37b1659f6db84282c58db918492696b Mon Sep 17 00:00:00 2001 From: jangevaare Date: Tue, 23 Apr 2024 13:53:11 +0000 Subject: [PATCH 2/2] Correct CLI help to parquet file --- processing/process.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/process.R b/processing/process.R index 6594718..f4279ee 100644 --- a/processing/process.R +++ b/processing/process.R @@ -18,7 +18,7 @@ parser = OptionParser( make_option( opt_str = c("-o", "--output"), - help = "Output file, in CSV format.", + help = "Output file, in parquet format.", type = "character", default = ""),