Skip to content

Commit

Permalink
Merge pull request #56 from OHDSI/pk-type-bug
Browse files Browse the repository at this point in the history
attempt to fix bug of bad primary key col check
  • Loading branch information
azimov authored Jun 12, 2024
2 parents 665b71a + 530fd82 commit 6c0023c
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 22 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ResultModelManager
Title: Result Model Manager
Version: 0.5.7
Version: 0.5.8
Authors@R:
person("Jamie", "Gilbert", , "gilbert@ohdsi.org", role = c("aut", "cre"))
Description: Database data model management utilities for OHDSI packages.
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# ResultModelManager 0.5.8

Bug fixes:

1. Fixed bug in uploads where empty tables are used to determine uploads (and can fail)


# ResultModelManager 0.5.7

Bug fixes:
Expand Down
55 changes: 34 additions & 21 deletions R/DataModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ checkAndFixColumnNames <-
expectedNames <- tableSpecs %>%
dplyr::select("columnName") %>%
dplyr::anti_join(dplyr::filter(optionalNames, !.data$columnName %in% observeredNames),
by = "columnName"
by = "columnName"
) %>%
dplyr::arrange("columnName") %>%
dplyr::pull()
Expand Down Expand Up @@ -181,7 +181,7 @@ checkAndFixDuplicateRows <-
specifications) {
primaryKeys <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()
duplicatedRows <- duplicated(table[, primaryKeys])
Expand All @@ -194,7 +194,7 @@ checkAndFixDuplicateRows <-
sum(duplicatedRows)
)
)
return(table[!duplicatedRows, ])
return(table[!duplicatedRows,])
} else {
return(table)
}
Expand All @@ -220,7 +220,7 @@ appendNewRows <-
if (nrow(data) > 0) {
primaryKeys <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()
newData <- newData %>%
Expand Down Expand Up @@ -250,10 +250,10 @@ formatDouble <- function(x) {

.truncateTable <- function(tableName, connection, schema, tablePrefix) {
DatabaseConnector::renderTranslateExecuteSql(connection,
"TRUNCATE TABLE @schema.@table_prefix@table;",
table_prefix = tablePrefix,
schema = schema,
table = tableName
"TRUNCATE TABLE @schema.@table_prefix@table;",
table_prefix = tablePrefix,
schema = schema,
table = tableName
)
invisible(NULL)
}
Expand Down Expand Up @@ -354,8 +354,8 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti
primaryKeyValuesInChunk <- unique(chunk[env$primaryKey])
duplicates <-
dplyr::inner_join(env$primaryKeyValuesInDb,
primaryKeyValuesInChunk,
by = env$primaryKey
primaryKeyValuesInChunk,
by = env$primaryKey
)

if (nrow(duplicates) != 0) {
Expand Down Expand Up @@ -386,7 +386,7 @@ uploadChunk <- function(chunk, pos, env, specifications, resultsFolder, connecti
# Remove duplicates we already dealt with:
env$primaryKeyValuesInDb <-
env$primaryKeyValuesInDb %>%
dplyr::anti_join(duplicates, by = env$primaryKey)
dplyr::anti_join(duplicates, by = env$primaryKey)
}
}
if (nrow(chunk) == 0) {
Expand Down Expand Up @@ -428,7 +428,7 @@ uploadTable <- function(tableName,

primaryKey <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
tolower(.data$primaryKey) == "yes") %>%
tolower(.data$primaryKey) == "yes") %>%
dplyr::select("columnName") %>%
dplyr::pull()

Expand All @@ -444,7 +444,7 @@ uploadTable <- function(tableName,
if (purgeSiteDataBeforeUploading && "database_id" %in% primaryKey) {
type <- specifications %>%
dplyr::filter(.data$tableName == !!tableName &
.data$columnName == "database_id") %>%
.data$columnName == "database_id") %>%
dplyr::select("dataType") %>%
dplyr::pull()
# Remove the existing data for the databaseId
Expand Down Expand Up @@ -572,10 +572,10 @@ uploadResults <- function(connection = NULL,
ParallelLogger::logInfo("Removing all records for tables within specification")

invisible(lapply(unique(specifications$tableName),
.truncateTable,
connection = connection,
schema = schema,
tablePrefix = tablePrefix
.truncateTable,
connection = connection,
schema = schema,
tablePrefix = tablePrefix
))
}

Expand Down Expand Up @@ -639,6 +639,7 @@ uploadResults <- function(connection = NULL,
#' @export
deleteAllRowsForPrimaryKey <-
function(connection, schema, tableName, keyValues) {

createSqlStatement <- function(i) {
sql <- paste0(
"DELETE FROM ",
Expand All @@ -647,7 +648,7 @@ deleteAllRowsForPrimaryKey <-
tableName,
"\nWHERE ",
paste(paste0(
colnames(keyValues), " = '", keyValues[i, ], "'"
colnames(keyValues), " = '", keyValues[i,], "'"
), collapse = " AND "),
";"
)
Expand Down Expand Up @@ -722,9 +723,9 @@ deleteAllRowsForDatabaseId <-
database_id = databaseId
)
DatabaseConnector::executeSql(connection,
sql,
progressBar = FALSE,
reportOverallTime = FALSE
sql,
progressBar = FALSE,
reportOverallTime = FALSE
)
}
}
Expand Down Expand Up @@ -810,10 +811,21 @@ loadResultsDataModelSpecifications <- function(filePath) {
#' recast to a character data type and not try to handle different type
#' conversions.
formatChunk <- function(pkValuesInDb, chunk) {

for (columnName in names(pkValuesInDb)) {
if (class(pkValuesInDb[[columnName]]) == "integer") {
pkValuesInDb[[columnName]] <- as.numeric(pkValuesInDb[[columnName]])
}

if (class(chunk[[columnName]]) == "integer") {
chunk[[columnName]] <- as.numeric(chunk[[columnName]])
}


if (class(pkValuesInDb[[columnName]]) != class(chunk[[columnName]])) {
if (class(pkValuesInDb[[columnName]]) == "character") {
chunk <- chunk |> dplyr::mutate_at(columnName, as.character)

} else {
errorMsg <- paste0(
columnName,
Expand All @@ -825,5 +837,6 @@ formatChunk <- function(pkValuesInDb, chunk) {
}
}
}

return(chunk)
}
9 changes: 9 additions & 0 deletions tests/testthat/test-DataModelFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,12 @@ test_that("formatChunk throws error for incompatible types", {
"id is of type numeric which cannot be converted between data frames pkValuesInDb and chunk"
)
})

test_that("format chunk handles int/numeric type conversions ok", {
class(pkValuesInDb$id) <- "numeric"
class(chunk$id) <- "integer"
chunk <- formatChunk(pkValuesInDb, chunk)
checkmate::expect_data_frame(chunk)
checkmate::expect_numeric(chunk$id)
})

0 comments on commit 6c0023c

Please sign in to comment.