forked from jgontrum/spacy-api-docker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
110 lines (91 loc) · 3.09 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import spacy
import os
import time
from flask import Flask
from flask_restful import reqparse, Api, Resource
language = os.environ['LANG'] or 'en'
print("Loading Language Model for '%s'..." % language)
nlp = spacy.load(language)
print("Language Model for '%s' loaded!" % language)
app = Flask("spaCy API")
api = Api(app)
parser = reqparse.RequestParser()
parser.add_argument('text', type=str, location='json')
parser.add_argument('texts', type=list, location='json')
parser.add_argument('fields', type=list, default=[], location='json')
class Spacy(Resource):
def get(self):
return 200
def post(self):
t0 = time.time()
args = parser.parse_args()
validation = self.__validate_input(args)
if validation:
return validation, 500
ret = {
'version': '1.8.1',
'lang': language
}
if args.get('text'):
# Analyze only a single text
ret.update(
self.__analyze(args.get('text'), args.get('fields')))
elif args.get('texts'):
ret['texts'] = [
self.__analyze(text, args.get('fields'))
for text in args.get('texts')]
ret['numOfTexts'] = len(args.get('texts'))
ret['performance'] = time.time() - t0,
ret['error'] = False
return ret, 200
@staticmethod
def __validate_input(args: dict):
message = ""
if not args.get('text') and not args.get('texts'):
message = "No text(s) received."
if args.get('texts') and not isinstance(args.get('texts'), list):
message = 'Wrong format for "texts". A list of strings is required.',
if message:
return {
'message': message,
'error': True
}
return None
@staticmethod
def __analyze(text: str, fields: list):
doc = nlp(text)
ret = {
'numOfSentences': len(list(doc.sents)),
'numOfTokens': len(list(doc)),
'sentences': []
}
for sentence in doc.sents:
sentence_analysis = [{
'token': w.orth_,
'lemma': w.lemma_,
'tag': w.tag_,
'ner': w.ent_type_,
'offsets': {
'begin': w.idx,
'end': w.idx + len(w.orth_)
},
'oov': w.is_oov,
'stop': w.is_stop,
'url': w.like_url,
'email': w.like_email,
'num': w.like_num,
'pos': w.pos_
} for w in sentence]
if fields:
# Remove certain fields if requested
sentence_analysis = [
dict([(k, v) for k, v in token.items() if k in fields])
for token in sentence_analysis
]
ret['sentences'].append(sentence_analysis)
return ret
api.add_resource(Spacy, '/api')
if __name__ == '__main__':
app.run(host="0.0.0.0", port=os.environ.get('PORT') or 5000)