Skip to content

Commit 701ec48

Browse files
committed
tried to reset get_citation_counts.R
1 parent 4ffb471 commit 701ec48

File tree

2 files changed

+45
-100
lines changed

2 files changed

+45
-100
lines changed

.github/workflows/github-actions.yml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,18 @@ jobs:
7171
ls -l _site
7272
7373
# 8. Setup MinIO client
74-
- name: Setup MinIO client
75-
uses: lovellfelix/minio-deploy-action@v1
76-
77-
with:
78-
endpoint: https://s3.embl.de/
79-
80-
access_key: 4kRFKz352s8UbsA6Pycq
81-
82-
secret_key: ${{ secrets.MINIO_SECRET_KEY }}
83-
84-
bucket: 'www-huber'
85-
86-
target_dir: '/group/'
87-
88-
source_dir: '_site'
74+
# - name: Setup MinIO client
75+
# uses: lovellfelix/minio-deploy-action@v1
76+
#
77+
# with:
78+
# endpoint: https://s3.embl.de/
79+
#
80+
# access_key: 4kRFKz352s8UbsA6Pycq
81+
#
82+
# secret_key: ${{ secrets.MINIO_SECRET_KEY }}
83+
#
84+
# bucket: 'www-huber'
85+
#
86+
# target_dir: '/group/'
87+
#
88+
# source_dir: '_site'

rscripts/get_citation_counts.R

Lines changed: 30 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -29,116 +29,62 @@ getSematicScholar <- function(doi) {
2929
}
3030

3131
getOpenCitations <- function(doi) {
32-
url_base <- 'https://w3id.org/oc/index/api/v1/citation-count/'
3332

34-
if (is.na(doi) || doi == "NA" || nchar(doi) == 0) {
35-
return(NA_integer_)
36-
}
33+
url_base <- 'https://w3id.org/oc/index/api/v1/citation-count/'
3734

38-
message("OpenCitations: ", doi)
39-
40-
max_tries <- 5
41-
for (i in seq_len(max_tries)) {
42-
# Add jitter (random pause)
43-
delay <- runif(1, min = 1, max = 2^i)
44-
Sys.sleep(delay)
45-
46-
response <- tryCatch({
47-
httr::GET(
48-
url = paste0(url_base, doi),
49-
add_headers(Authorization = "efaa452b-43c9-42a1-8721-76ec51863458"),
50-
httr::timeout(5) # <- timeout after 5 seconds
51-
)
52-
}, error = function(e) {
53-
warning(paste("Attempt", i, "- Connection error for", doi, ":", e$message))
54-
return(NULL)
55-
})
56-
57-
# If response is successful, parse and return
58-
if (!is.null(response) && !httr::http_error(response)) {
59-
content <- httr::content(response)
60-
if (length(content)) {
61-
return(as.integer(content[[1]]))
62-
} else {
63-
return(NA_integer_)
64-
}
35+
if( is.na(doi) | doi == "NA" | nchar(doi) == 0 ) {
36+
n_citations <- NA
37+
} else {
38+
message("OpenCitations: ", doi)
39+
## We use an API token generated for the email address mike.smith@embl.de
40+
result <- httr::GET( url = paste0(url_base, doi),
41+
add_headers(Authorization = "efaa452b-43c9-42a1-8721-76ec51863458")) %>%
42+
httr::content()
43+
44+
if(length(result)) {
45+
n_citations <- result %>%
46+
magrittr::extract2(1) %>%
47+
as.integer()
48+
} else {
49+
n_citations <- NA
6550
}
66-
67-
warning(paste("Attempt", i, "- Failed for", doi, "-", if (!is.null(response)) httr::status_code(response)))
6851
}
6952

70-
warning(paste("All attempts failed for DOI:", doi))
71-
return(NA_integer_)
53+
return(n_citations)
7254
}
7355

7456
## this requires a perfect match between the title in the bibtex entry
7557
## and the result from Google Scholar. Not ideal, but we only use it for
7658
## references without a DOI
77-
getGoogleScholar <- function(title, max_retries = 3, wait_seconds = 2) {
78-
bibtex_title <- tolower(title) %>%
79-
stringr::str_remove_all("\\{|\\}")
80-
81-
# Retry logic
82-
attempt <- 1
83-
wh_gscholar <- NULL
84-
85-
# Check Google Scholar connection
86-
check_connection <- function() {
87-
res <- tryCatch(
88-
GET("https://scholar.google.com"),
89-
error = function(e) NULL
90-
)
91-
return(!is.null(res) && status_code(res) == 200)
92-
}
93-
94-
if (!check_connection()) {
95-
message("❌ Unable to reach Google Scholar. Please check your internet connection.")
96-
return(as.integer(NA))
97-
}
59+
getGoogleScholar <- function(title) {
60+
## this gets Wolfgang's Google Scholar data
61+
wh_gscholar <- scholar::get_publications('gI8o6x8AAAAJ')
9862

99-
while (attempt <= max_retries) {
100-
message(sprintf("Attempt %d to fetch Google Scholar data...", attempt))
101-
102-
wh_gscholar <- tryCatch(
103-
scholar::get_publications('gI8o6x8AAAAJ'),
104-
error = function(e) NULL,
105-
warning = function(w) NULL
106-
)
107-
108-
if (is.data.frame(wh_gscholar)) {
109-
break # success
110-
} else {
111-
Sys.sleep(wait_seconds) # wait before retrying
112-
attempt <- attempt + 1
113-
}
114-
}
115-
116-
if (!is.data.frame(wh_gscholar)) {
117-
message("❌ Failed to fetch Google Scholar data. Possible connection issue.")
118-
return(as.integer(NA))
119-
}
63+
bibtex_title <- tolower(title) %>%
64+
str_remove_all("\\{|\\}")
12065

12166
res <- wh_gscholar %>%
122-
dplyr::mutate(t2 = tolower(title)) %>%
123-
dplyr::filter(t2 == bibtex_title)
67+
mutate(t2 = tolower(title)) %>%
68+
filter(t2 == bibtex_title)
12469

125-
n_citations <- if (nrow(res) > 0) as.integer(res$cites[1]) else as.integer(NA)
70+
n_citations <- ifelse(nrow(res), as.integer(res$cites), NA)
12671
return(n_citations)
12772
}
12873

74+
12975
cleandoi <- function(doi) {
13076
cdoi =
13177
stringr::str_remove(doi, '(https|http)?(://)?(dx.)?doi.org/') |>
13278
stringr::str_remove('doi:') |>
13379
stringr::str_remove('^"') |>
13480
stringr::str_remove('"$') |>
13581
stringr::str_remove('\\},$')
136-
82+
13783
stopifnot(identical(cdoi, doi))
13884
cdoi
13985
}
14086

141-
## remove "comment" lines from bibtex. bib2df tries to include these as
87+
## remove "comment" lines from bibtex. bib2df tries to include these as
14288
## fields in the table later
14389
tmp <- readLines('lop.bib')
14490
idx_to_remove <- grep(pattern = "^%", x = tmp)
@@ -150,13 +96,12 @@ writeLines(tmp, tf)
15096

15197
citation_counts <- bib2df(tf) %>%
15298
mutate(DOI = cleandoi(DOI)) %>%
153-
mutate(search_id = if_else(DOI == "" | DOI == "NA", "", DOI)) %>%
99+
mutate(search_id = if_else(DOI == "" | DOI == "NA", CORPUS, DOI)) %>%
154100
mutate(
155101
open_citations = vapply(DOI, getOpenCitations, FUN.VALUE = integer(1)),
156102
semantic_scholar = vapply(search_id, getSematicScholar, FUN.VALUE = integer(1)),
157103
google_scholar = vapply(TITLE, getGoogleScholar, FUN.VALUE = integer(1))
158104
) %>%
159105
select(BIBTEXKEY, open_citations, semantic_scholar, google_scholar)
160106

161-
saveRDS(citation_counts, file = "citation_counts.rds")
162-
107+
saveRDS(citation_counts, file = "citation_counts.rds")

0 commit comments

Comments
 (0)