Skip to content

Commit

Permalink
handle integer as numeric to prevent type problems pulling out column…
Browse files Browse the repository at this point in the history
…s from db
  • Loading branch information
azimov committed Jun 12, 2024
1 parent e1db266 commit 530fd82
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 35 deletions.
81 changes: 46 additions & 35 deletions R/DataModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ checkAndFixColumnNames <-
expectedNames <- tableSpecs %>%
dplyr::select("columnName") %>%
dplyr::anti_join(dplyr::filter(optionalNames, !.data$columnName %in% observeredNames),
by = "columnName"
by = "columnName"
) %>%
dplyr::arrange("columnName") %>%
dplyr::pull()
Expand Down Expand Up @@ -181,7 +181,7 @@ checkAndFixDuplicateRows <-
specifications) {
primaryKeys <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()
duplicatedRows <- duplicated(table[, primaryKeys])
Expand All @@ -194,7 +194,7 @@ checkAndFixDuplicateRows <-
sum(duplicatedRows)
)
)
return(table[!duplicatedRows, ])
return(table[!duplicatedRows,])
} else {
return(table)
}
Expand All @@ -220,7 +220,7 @@ appendNewRows <-
if (nrow(data) > 0) {
primaryKeys <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()
newData <- newData %>%
Expand Down Expand Up @@ -250,10 +250,10 @@ formatDouble <- function(x) {

.truncateTable <- function(tableName, connection, schema, tablePrefix) {
DatabaseConnector::renderTranslateExecuteSql(connection,
"TRUNCATE TABLE @schema.@table_prefix@table;",
table_prefix = tablePrefix,
schema = schema,
table = tableName
"TRUNCATE TABLE @schema.@table_prefix@table;",
table_prefix = tablePrefix,
schema = schema,
table = tableName
)
invisible(NULL)
}
Expand Down Expand Up @@ -354,8 +354,8 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti
primaryKeyValuesInChunk <- unique(chunk[env$primaryKey])
duplicates <-
dplyr::inner_join(env$primaryKeyValuesInDb,
primaryKeyValuesInChunk,
by = env$primaryKey
primaryKeyValuesInChunk,
by = env$primaryKey
)

if (nrow(duplicates) != 0) {
Expand Down Expand Up @@ -386,7 +386,7 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti
# Remove duplicates we already dealt with:
env$primaryKeyValuesInDb <-
env$primaryKeyValuesInDb %>%
dplyr::anti_join(duplicates, by = env$primaryKey)
dplyr::anti_join(duplicates, by = env$primaryKey)
}
}
if (nrow(chunk) == 0) {
Expand Down Expand Up @@ -428,7 +428,7 @@ uploadTable <- function(tableName,

primaryKey <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()

Expand All @@ -444,7 +444,7 @@ uploadTable <- function(tableName,
if (purgeSiteDataBeforeUploading && "database_id" %in% primaryKey) {
type <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
.data$columnName == "database_id") %>%
.data$columnName == "database_id") %>%
dplyr::select("dataType") %>%
dplyr::pull()
# Remove the existing data for the databaseId
Expand Down Expand Up @@ -572,10 +572,10 @@ uploadResults <- function(connection = NULL,
ParallelLogger::logInfo("Removing all records for tables within specification")

invisible(lapply(unique(specifications$tableName),
.truncateTable,
connection = connection,
schema = schema,
tablePrefix = tablePrefix
.truncateTable,
connection = connection,
schema = schema,
tablePrefix = tablePrefix
))
}

Expand Down Expand Up @@ -639,6 +639,7 @@ uploadResults <- function(connection = NULL,
#' @export
deleteAllRowsForPrimaryKey <-
function(connection, schema, tableName, keyValues) {

createSqlStatement <- function(i) {
sql <- paste0(
"DELETE FROM ",
Expand All @@ -647,7 +648,7 @@ deleteAllRowsForPrimaryKey <-
tableName,
"\nWHERE ",
paste(paste0(
colnames(keyValues), " = '", keyValues[i, ], "'"
colnames(keyValues), " = '", keyValues[i,], "'"
), collapse = " AND "),
";"
)
Expand Down Expand Up @@ -722,9 +723,9 @@ deleteAllRowsForDatabaseId <-
database_id = databaseId
)
DatabaseConnector::executeSql(connection,
sql,
progressBar = FALSE,
reportOverallTime = FALSE
sql,
progressBar = FALSE,
reportOverallTime = FALSE
)
}
}
Expand Down Expand Up @@ -810,22 +811,32 @@ loadResultsDataModelSpecifications <- function(filePath) {
#' recast to a character data type and not try to handle different type
#' conversions.
formatChunk <- function(pkValuesInDb, chunk) {
if (nrow(pkValuesInDb) > 0) {
for (columnName in names(pkValuesInDb)) {
if (class(pkValuesInDb[[columnName]]) != class(chunk[[columnName]])) {
if (class(pkValuesInDb[[columnName]]) == "character") {
chunk <- chunk |> dplyr::mutate_at(columnName, as.character)
} else {
errorMsg <- paste0(
columnName,
" is of type ",
class(pkValuesInDb[[columnName]]),
" which cannot be converted between data frames pkValuesInDb and chunk"
)
stop(errorMsg)
}

for (columnName in names(pkValuesInDb)) {
if (class(pkValuesInDb[[columnName]]) == "integer") {
pkValuesInDb[[columnName]] <- as.numeric(pkValuesInDb[[columnName]])
}

if (class(chunk[[columnName]]) == "integer") {
chunk[[columnName]] <- as.numeric(chunk[[columnName]])
}


if (class(pkValuesInDb[[columnName]]) != class(chunk[[columnName]])) {
if (class(pkValuesInDb[[columnName]]) == "character") {
chunk <- chunk |> dplyr::mutate_at(columnName, as.character)

} else {
errorMsg <- paste0(
columnName,
" is of type ",
class(pkValuesInDb[[columnName]]),
" which cannot be converted between data frames pkValuesInDb and chunk"
)
stop(errorMsg)
}
}
}

return(chunk)
}
9 changes: 9 additions & 0 deletions tests/testthat/test-DataModelFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,12 @@ test_that("formatChunk throws error for incompatible types", {
"id is of type numeric which cannot be converted between data frames pkValuesInDb and chunk"
)
})

test_that("format chunk handles int/numeric type conversions ok", {
class(pkValuesInDb$id) <- "numeric"
class(chunk$id) <- "integer"
chunk <- formatChunk(pkValuesInDb, chunk)
checkmate::expect_data_frame(chunk)
checkmate::expect_numeric(chunk$id)
})

0 comments on commit 530fd82

Please sign in to comment.