-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcitation.parser.plus.0.2.php
183 lines (159 loc) · 6.3 KB
/
citation.parser.plus.0.2.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
<?php
/* MADE 2019, JOSHUA NEDS-FOX,
CONSIDER THIS COMMENT
YOUR EXPLICIT, NON-EXCLUSIVE LICENSE
TO USE AND ADAPT AS NECESSARY
AS LONG AS YOU CREDIT THE SOURCE */
if (($_POST['submit'] != FALSE) && ($_POST['crunk'] ==
'change_this_password')) {
//#################################//
//########## FUNCTIONS ############//
//#################################//
//receiveCitations CREATES AN ARRAY OF ALL REFERENCES PASTED INTO THE WEBFORM
function receiveCitations($citationdata) {
$citationdata = str_replace('&', 'and', $citationdata);
$citationdata = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $citationdata); //REMOVE ANY BLANK LINES
$citations = explode("\n", $citationdata); //BREAK REFERENCES INTO AN ARRAY
return $citations;
}
//queryCrossref QUERIES CROSSREF FOR ONE REFERENCE, RETURNS
//AN ARRAY OF THE TOP TWO RESULTS AS PHP OBJECTS FROM THEIR DATABASE
function queryCrossref($single_reference) {
//BASED ON THE CROSSREF REST API https://github.com/CrossRef/rest-api-doc
$url = "https://api.crossref.org/works?query.bibliographic=".urlencode($single_reference)."&rows=5&sort=score&order=desc";
$query = "curl -X GET ".$url." -H 'User-Agent: citation-parser/0.2 (https://github.com/WSULib/citation-parser; mailto:[your_email])'";
ob_start();
passthru($query);
$crossref_results = ob_get_contents();
ob_end_clean();
$crossref_results = json_decode($crossref_results); //TURNS JSON INTO PHP OBJECT
$top_two_results = array();
$top_two_results[] = $crossref_results->message->items[0];
$top_two_results[] = $crossref_results->message->items[1];
return $top_two_results;
}
//queryAnyStyle QUERIES ANYSTYLE FOR ALL REFERENCES, RETURNS
//A PHP ARRAY WITH CITATION PARTS FOR ALL REFERENCES
function queryAnyStyle($citations) {
//TAKE THE ARRAY RETURNED BY receiveCitations,
//MAKE A JSON-STYLE STRING INSTEAD
$citationList = "[";
foreach ($citations as $citation) {
$citationList .= "\"";
$citationList .= $citation;
$citationList .= "\",";
}
$citationList = substr($citationList,0,-1);
$citationList .= "]";
//QUERY anystyle.io WITH THE LIST
$query = "curl -d '{\"access_token\":\"[YOU'LL NEED AN ACCESS TOKEN FROM ANYSTYLE.IO]\",".
$query .= "\"references\":".$citationList.",\"format\":\"json\"}' ";
$query .= "-H \"Content-Type: application/json;charset=UTF-8\" ";
$query .= "-X POST https://anystyle.io/parse/references.json";
ob_start();
passthru($query);
$anystyle_results = ob_get_contents();
ob_end_clean();
//RETURN A PHP ARRAY OF THE RESULTS
$anystyle_results = json_decode($anystyle_results, TRUE); //JSON -> PHP ARRAY
return $anystyle_results;
}
//processReferences GETS TWO POTENTIAL DOI MATCHES FOR EACH REFERENCE PASTED INTO THE WEBFORM
//AND MERGES THEM WITH THE REFERENCE PARTS OF EACH REFERENCE
function processReferences($citations) {
$dois = array(); $references = array();
$i=0;
$reference_parts = queryAnyStyle($citations);
foreach ($citations as $citation) {
$dois[$i] = queryCrossref($citation);
$dois[$i][2] = $citation;
$i = $i+1;
sleep(1);
}
$dois_and_parts = array($dois,$reference_parts);
$i=0;
foreach ($dois_and_parts[0] as $reference) {
$references[$i] = array($reference,$reference_parts[$i]);
$i++;
}
return $references;
}
//buildXML TAKES OUTPUT OF processReferences,
//MAKES A HUMAN READABLE DOI CHECKER FOR EACH REFERENCE,
//REQUIRES A DOI OR A DUMMY DOI FOR THE PARENT ARTICLE
function buildXML($dois, $article_doi) {
$xml = "<citations>\r\n";
$i=1;
foreach ($dois as $item) {
$j=0;
$xml .= "<citation key=\"key-".$article_doi."-".$i."\">\r\n";
//THE ORIGINAL REFERENCE
$xml .= "<raw_string>".$item[0][2]."</raw_string>\r\n";
//CONSTRUCT REFERENCES FOR THE TWO POSSIBLE MATCHES
while ($j<=1) {
$number=$j+1;
if ($item[0][$j]->type != "journal-article") {
$reconstructed_reference = "{$item[0][$j]->author[0]->family} {$item[0][$j]->author[0]->given}. ("
.$item[0][$j]->issued->{'date-parts'}[0][0]."). {$item[0][$j]->title[0]}. "
.$item[0][$j]->publisher;
} else {
$reconstructed_reference = "{$item[0][$j]->author[0]->family} {$item[0][$j]->author[0]->given}. ("
.$item[0][$j]->issued->{'date-parts'}[0][0]."). {$item[0][$j]->title[0]}. "
.$item[0][$j]->{'container-title'}[0].", {$item[0][$j]->volume}({$item[0][$j]->issue}), "
.$item[0][$j]->page;
}
$xml .= "<reference{$number}>".$reconstructed_reference."</reference{$number}>\r\n";
$j=$j+1;
}
//CONSTRUCT METADATA FOR THOSE TWO POSSIBLE MATCHES
$j=0;
while ($j<=1) {
$number=$j+1; $flag = "";
//SCORES OF 100 OR GREATER SEEM TO INDICATE A SURE MATCH
if ($item[0][$j]->score >= 100) {$flag = "TRUE";} else {$flag = "FALSE";}
$xml .= "<result number=\"{$number}\" flag=\"{$flag}\">\r\n";
$xml .= "<type>".$item[0][$j]->type."</type>\r\n";
$xml .= "<doi>".$item[0][$j]->DOI."</doi>\r\n";
$xml .= "<title>".$item[0][$j]->title[0]."</title>\r\n";
$xml .= "<author>".$item[0][$j]->author[0]->family."</author>\r\n";
$xml .= "<score>".$item[0][$j]->score."</score>\r\n";
$xml .= "</result>\r\n";
$j=$j+1;
}
//CONSTRUCT THE FIELDS FROM THE ORIGINAL REFERENCE
$xml .= "<reference_parts>\r\n";
foreach ($item[1] as $key => $value) {
$xml .= "<".$key.">".$value."</".$key.">\r\n";
}
$xml .= "</reference_parts>\r\n";
$xml .= "</citation>\r\n";
//INCREASE THE CITATION LIST COUNT BY 1
$i = $i+1;
}
$xml .= "</citations>";
return $xml;
}
//#################################//
//########## PROCESSING ###########//
//#################################//
$citations = receiveCitations($_POST['citations']);
// $citations IS AN ARRAY OF INDIVIDUAL REFERENCES
$references = processReferences($citations);
// GET TWO DOIS FOR EVERY REFERENCE, PLUS REFERENCE PARTS
$xml = buildXML($references, $_POST['article_doi']);
/* WE NOW HAVE
AN XML DOCUMENT OF CITATIONS,
WITH EACH CITATION CONTAINING
1. THE ORIGINAL RAW_STRING,
2. TWO POSSIBLE DOI MATCHES,
3. AND THE BIBLIOGRAPHIC PARTS OF THE ORIGINAL REFERENCE */
echo($xml);
} else { ?>
<form name="citation_parser" method="post" action="citation.parser.plus.0.2.php">
<textarea name="citations"></textarea> [citations]<br />
<input type="password" name="crunk" /> [crunk]<br />
<input type="text" name="article_doi" /> [article doi]<br />
<input type="submit" name="submit" value="Submit" />
</form><?php
}
?>