@@ -29,116 +29,62 @@ getSematicScholar <- function(doi) {
29
29
}
30
30
31
31
getOpenCitations <- function (doi ) {
32
- url_base <- ' https://w3id.org/oc/index/api/v1/citation-count/'
33
32
34
- if (is.na(doi ) || doi == " NA" || nchar(doi ) == 0 ) {
35
- return (NA_integer_ )
36
- }
33
+ url_base <- ' https://w3id.org/oc/index/api/v1/citation-count/'
37
34
38
- message(" OpenCitations: " , doi )
39
-
40
- max_tries <- 5
41
- for (i in seq_len(max_tries )) {
42
- # Add jitter (random pause)
43
- delay <- runif(1 , min = 1 , max = 2 ^ i )
44
- Sys.sleep(delay )
45
-
46
- response <- tryCatch({
47
- httr :: GET(
48
- url = paste0(url_base , doi ),
49
- add_headers(Authorization = " efaa452b-43c9-42a1-8721-76ec51863458" ),
50
- httr :: timeout(5 ) # <- timeout after 5 seconds
51
- )
52
- }, error = function (e ) {
53
- warning(paste(" Attempt" , i , " - Connection error for" , doi , " :" , e $ message ))
54
- return (NULL )
55
- })
56
-
57
- # If response is successful, parse and return
58
- if (! is.null(response ) && ! httr :: http_error(response )) {
59
- content <- httr :: content(response )
60
- if (length(content )) {
61
- return (as.integer(content [[1 ]]))
62
- } else {
63
- return (NA_integer_ )
64
- }
35
+ if ( is.na(doi ) | doi == " NA" | nchar(doi ) == 0 ) {
36
+ n_citations <- NA
37
+ } else {
38
+ message(" OpenCitations: " , doi )
39
+ # # We use an API token generated for the email address mike.smith@embl.de
40
+ result <- httr :: GET( url = paste0(url_base , doi ),
41
+ add_headers(Authorization = " efaa452b-43c9-42a1-8721-76ec51863458" )) %> %
42
+ httr :: content()
43
+
44
+ if (length(result )) {
45
+ n_citations <- result %> %
46
+ magrittr :: extract2(1 ) %> %
47
+ as.integer()
48
+ } else {
49
+ n_citations <- NA
65
50
}
66
-
67
- warning(paste(" Attempt" , i , " - Failed for" , doi , " -" , if (! is.null(response )) httr :: status_code(response )))
68
51
}
69
52
70
- warning(paste(" All attempts failed for DOI:" , doi ))
71
- return (NA_integer_ )
53
+ return (n_citations )
72
54
}
73
55
74
56
# # this requires a perfect match between the title in the bibtex entry
75
57
# # and the result from Google Scholar. Not ideal, but we only use it for
76
58
# # references without a DOI
77
- getGoogleScholar <- function (title , max_retries = 3 , wait_seconds = 2 ) {
78
- bibtex_title <- tolower(title ) %> %
79
- stringr :: str_remove_all(" \\ {|\\ }" )
80
-
81
- # Retry logic
82
- attempt <- 1
83
- wh_gscholar <- NULL
84
-
85
- # Check Google Scholar connection
86
- check_connection <- function () {
87
- res <- tryCatch(
88
- GET(" https://scholar.google.com" ),
89
- error = function (e ) NULL
90
- )
91
- return (! is.null(res ) && status_code(res ) == 200 )
92
- }
93
-
94
- if (! check_connection()) {
95
- message(" ❌ Unable to reach Google Scholar. Please check your internet connection." )
96
- return (as.integer(NA ))
97
- }
59
+ getGoogleScholar <- function (title ) {
60
+ # # this gets Wolfgang's Google Scholar data
61
+ wh_gscholar <- scholar :: get_publications(' gI8o6x8AAAAJ' )
98
62
99
- while (attempt < = max_retries ) {
100
- message(sprintf(" Attempt %d to fetch Google Scholar data..." , attempt ))
101
-
102
- wh_gscholar <- tryCatch(
103
- scholar :: get_publications(' gI8o6x8AAAAJ' ),
104
- error = function (e ) NULL ,
105
- warning = function (w ) NULL
106
- )
107
-
108
- if (is.data.frame(wh_gscholar )) {
109
- break # success
110
- } else {
111
- Sys.sleep(wait_seconds ) # wait before retrying
112
- attempt <- attempt + 1
113
- }
114
- }
115
-
116
- if (! is.data.frame(wh_gscholar )) {
117
- message(" ❌ Failed to fetch Google Scholar data. Possible connection issue." )
118
- return (as.integer(NA ))
119
- }
63
+ bibtex_title <- tolower(title ) %> %
64
+ str_remove_all(" \\ {|\\ }" )
120
65
121
66
res <- wh_gscholar %> %
122
- dplyr :: mutate(t2 = tolower(title )) %> %
123
- dplyr :: filter(t2 == bibtex_title )
67
+ mutate(t2 = tolower(title )) %> %
68
+ filter(t2 == bibtex_title )
124
69
125
- n_citations <- if (nrow(res ) > 0 ) as.integer(res $ cites [ 1 ]) else as.integer( NA )
70
+ n_citations <- ifelse (nrow(res ), as.integer(res $ cites ), NA )
126
71
return (n_citations )
127
72
}
128
73
74
+
129
75
cleandoi <- function (doi ) {
130
76
cdoi =
131
77
stringr :: str_remove(doi , ' (https|http)?(://)?(dx.)?doi.org/' ) | >
132
78
stringr :: str_remove(' doi:' ) | >
133
79
stringr :: str_remove(' ^"' ) | >
134
80
stringr :: str_remove(' "$' ) | >
135
81
stringr :: str_remove(' \\ },$' )
136
-
82
+
137
83
stopifnot(identical(cdoi , doi ))
138
84
cdoi
139
85
}
140
86
141
- # # remove "comment" lines from bibtex. bib2df tries to include these as
87
+ # # remove "comment" lines from bibtex. bib2df tries to include these as
142
88
# # fields in the table later
143
89
tmp <- readLines(' lop.bib' )
144
90
idx_to_remove <- grep(pattern = " ^%" , x = tmp )
@@ -150,13 +96,12 @@ writeLines(tmp, tf)
150
96
151
97
citation_counts <- bib2df(tf ) %> %
152
98
mutate(DOI = cleandoi(DOI )) %> %
153
- mutate(search_id = if_else(DOI == " " | DOI == " NA" , " " , DOI )) %> %
99
+ mutate(search_id = if_else(DOI == " " | DOI == " NA" , CORPUS , DOI )) %> %
154
100
mutate(
155
101
open_citations = vapply(DOI , getOpenCitations , FUN.VALUE = integer(1 )),
156
102
semantic_scholar = vapply(search_id , getSematicScholar , FUN.VALUE = integer(1 )),
157
103
google_scholar = vapply(TITLE , getGoogleScholar , FUN.VALUE = integer(1 ))
158
104
) %> %
159
105
select(BIBTEXKEY , open_citations , semantic_scholar , google_scholar )
160
106
161
- saveRDS(citation_counts , file = " citation_counts.rds" )
162
-
107
+ saveRDS(citation_counts , file = " citation_counts.rds" )
0 commit comments