diff --git a/R/DataModel.R b/R/DataModel.R index abe01f4..97c8625 100644 --- a/R/DataModel.R +++ b/R/DataModel.R @@ -49,7 +49,7 @@ checkAndFixColumnNames <- expectedNames <- tableSpecs %>% dplyr::select("columnName") %>% dplyr::anti_join(dplyr::filter(optionalNames, !.data$columnName %in% observeredNames), - by = "columnName" + by = "columnName" ) %>% dplyr::arrange("columnName") %>% dplyr::pull() @@ -181,7 +181,7 @@ checkAndFixDuplicateRows <- specifications) { primaryKeys <- specifications %>% dplyr::filter(.data$tableName == !!tableName & - tolower(.data$primaryKey) == "yes") %>% + tolower(.data$primaryKey) == "yes") %>% dplyr::select("columnName") %>% dplyr::pull() duplicatedRows <- duplicated(table[, primaryKeys]) @@ -194,7 +194,7 @@ checkAndFixDuplicateRows <- sum(duplicatedRows) ) ) - return(table[!duplicatedRows, ]) + return(table[!duplicatedRows,]) } else { return(table) } @@ -220,7 +220,7 @@ appendNewRows <- if (nrow(data) > 0) { primaryKeys <- specifications %>% dplyr::filter(.data$tableName == !!tableName & - tolower(.data$primaryKey) == "yes") %>% + tolower(.data$primaryKey) == "yes") %>% dplyr::select("columnName") %>% dplyr::pull() newData <- newData %>% @@ -250,10 +250,10 @@ formatDouble <- function(x) { .truncateTable <- function(tableName, connection, schema, tablePrefix) { DatabaseConnector::renderTranslateExecuteSql(connection, - "TRUNCATE TABLE @schema.@table_prefix@table;", - table_prefix = tablePrefix, - schema = schema, - table = tableName + "TRUNCATE TABLE @schema.@table_prefix@table;", + table_prefix = tablePrefix, + schema = schema, + table = tableName ) invisible(NULL) } @@ -354,8 +354,8 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti primaryKeyValuesInChunk <- unique(chunk[env$primaryKey]) duplicates <- dplyr::inner_join(env$primaryKeyValuesInDb, - primaryKeyValuesInChunk, - by = env$primaryKey + primaryKeyValuesInChunk, + by = env$primaryKey ) if (nrow(duplicates) != 0) { @@ -386,7 +386,7 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti # Remove duplicates we already dealt with: env$primaryKeyValuesInDb <- env$primaryKeyValuesInDb %>% - dplyr::anti_join(duplicates, by = env$primaryKey) + dplyr::anti_join(duplicates, by = env$primaryKey) } } if (nrow(chunk) == 0) { @@ -428,7 +428,7 @@ uploadTable <- function(tableName, primaryKey <- specifications %>% dplyr::filter(.data$tableName == !!tableName & - tolower(.data$primaryKey) == "yes") %>% + tolower(.data$primaryKey) == "yes") %>% dplyr::select("columnName") %>% dplyr::pull() @@ -444,7 +444,7 @@ uploadTable <- function(tableName, if (purgeSiteDataBeforeUploading && "database_id" %in% primaryKey) { type <- specifications %>% dplyr::filter(.data$tableName == !!tableName & - .data$columnName == "database_id") %>% + .data$columnName == "database_id") %>% dplyr::select("dataType") %>% dplyr::pull() # Remove the existing data for the databaseId @@ -572,10 +572,10 @@ uploadResults <- function(connection = NULL, ParallelLogger::logInfo("Removing all records for tables within specification") invisible(lapply(unique(specifications$tableName), - .truncateTable, - connection = connection, - schema = schema, - tablePrefix = tablePrefix + .truncateTable, + connection = connection, + schema = schema, + tablePrefix = tablePrefix )) } @@ -639,6 +639,7 @@ uploadResults <- function(connection = NULL, #' @export deleteAllRowsForPrimaryKey <- function(connection, schema, tableName, keyValues) { + createSqlStatement <- function(i) { sql <- paste0( "DELETE FROM ", @@ -647,7 +648,7 @@ deleteAllRowsForPrimaryKey <- tableName, "\nWHERE ", paste(paste0( - colnames(keyValues), " = '", keyValues[i, ], "'" + colnames(keyValues), " = '", keyValues[i,], "'" ), collapse = " AND "), ";" ) @@ -722,9 +723,9 @@ deleteAllRowsForDatabaseId <- database_id = databaseId ) DatabaseConnector::executeSql(connection, - sql, - progressBar = FALSE, - reportOverallTime = FALSE + sql, + progressBar = FALSE, + reportOverallTime = FALSE ) } } @@ -810,22 +811,32 @@ loadResultsDataModelSpecifications <- function(filePath) { #' recast to a character data type and not try to handle different type #' conversions. formatChunk <- function(pkValuesInDb, chunk) { - if (nrow(pkValuesInDb) > 0) { - for (columnName in names(pkValuesInDb)) { - if (class(pkValuesInDb[[columnName]]) != class(chunk[[columnName]])) { - if (class(pkValuesInDb[[columnName]]) == "character") { - chunk <- chunk |> dplyr::mutate_at(columnName, as.character) - } else { - errorMsg <- paste0( - columnName, - " is of type ", - class(pkValuesInDb[[columnName]]), - " which cannot be converted between data frames pkValuesInDb and chunk" - ) - stop(errorMsg) - } + + for (columnName in names(pkValuesInDb)) { + if (class(pkValuesInDb[[columnName]]) == "integer") { + pkValuesInDb[[columnName]] <- as.numeric(pkValuesInDb[[columnName]]) + } + + if (class(chunk[[columnName]]) == "integer") { + chunk[[columnName]] <- as.numeric(chunk[[columnName]]) + } + + + if (class(pkValuesInDb[[columnName]]) != class(chunk[[columnName]])) { + if (class(pkValuesInDb[[columnName]]) == "character") { + chunk <- chunk |> dplyr::mutate_at(columnName, as.character) + + } else { + errorMsg <- paste0( + columnName, + " is of type ", + class(pkValuesInDb[[columnName]]), + " which cannot be converted between data frames pkValuesInDb and chunk" + ) + stop(errorMsg) } } } + return(chunk) } diff --git a/tests/testthat/test-DataModelFunctions.R b/tests/testthat/test-DataModelFunctions.R index dc69771..e7c3dc1 100644 --- a/tests/testthat/test-DataModelFunctions.R +++ b/tests/testthat/test-DataModelFunctions.R @@ -79,3 +79,12 @@ test_that("formatChunk throws error for incompatible types", { "id is of type numeric which cannot be converted between data frames pkValuesInDb and chunk" ) }) + +test_that("format chunk handles int/numeric type conversions ok", { + class(pkValuesInDb$id) <- "numeric" + class(chunk$id) <- "integer" + chunk <- formatChunk(pkValuesInDb, chunk) + checkmate::expect_data_frame(chunk) + checkmate::expect_numeric(chunk$id) +}) +