Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(getSubnetworkFromIndra): Fix bugs related to multiple matches for an HGNC ID #28

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions R/utils_annotateProteinInfoFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,27 @@
stop("Input must be a list.")
}

if (any(!sapply(uniprotMnemonicIds, is.character))) {
stop("All elements in the list must be character strings representing UniProt mnemonic IDs.")
}

if (length(uniprotMnemonicIds) == 0) {
stop("Input list must not be empty.")
}

tryCatch({
# Attempt to convert all elements to character if not already character
uniprotMnemonicIds <- lapply(uniprotMnemonicIds, function(x) {
if (!is.character(x)) {
as.character(x)
} else {
x
}
})

# Check if conversion was successful
if (any(!sapply(uniprotMnemonicIds, is.character))) {
stop("All elements in the list must be character strings representing UniProt mnemonic IDs.")
}
}, error = function(e) {
stop("An error occurred converting uniprot mnemonic IDs to character strings: ", e$message)
})

apiUrl <- file.path(Sys.getenv("INDRA_API_URL"), "api/get_uniprot_ids_from_uniprot_mnemonic_ids")

Expand Down
22 changes: 19 additions & 3 deletions R/utils_getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
stop("Invalid Input Error: Input must contain a column named 'HgncId'.")
}
if (nrow(input) >= 400) {
stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider adding a p-value cutoff")
stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff")
}
}

Expand Down Expand Up @@ -64,8 +64,24 @@
edge$target_id, "@HGNC&format=html",
sep = ""
)
edge$source_uniprot_id <- input[input$HgncId == edge$source_id, ]$Protein
edge$target_uniprot_id <- input[input$HgncId == edge$target_id, ]$Protein

# Convert back to uniprot IDs
matched_rows_source <- input[input$HgncId == edge$source_id & !is.na(input$Protein), ]
matched_rows_target <- input[input$HgncId == edge$target_id & !is.na(input$Protein), ]

if (nrow(matched_rows_source) != 1 || nrow(matched_rows_target) != 1) {
stop(paste0(
"INDRA Exception: Unexpected number of matches for the following HGNC IDs in the input data: ",
edge$source_id,
" or ",
edge$target_id,
". Each ID must match exactly one entry in the input data, but 0 or multiple matches were found. Please check the input data for duplicates or missing entries."
))
}

edge$source_uniprot_id <- matched_rows_source$Protein
edge$target_uniprot_id <- matched_rows_target$Protein

return(edge)
}

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("Exception is thrown for 400+ proteins in dataframe", {
)
expect_error(
getSubnetworkFromIndra(input_400),
"Invalid Input Error: INDRA query must contain less than 400 proteins. Consider adding a p-value cutoff"
"Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff"
)
})

Expand Down
Loading