forked from ines/spacy-js
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathserver.py
127 lines (112 loc) Β· 3.57 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# coding: utf8
from __future__ import unicode_literals
import hug
from hug_middleware_cors import CORSMiddleware
import waitress
import spacy
import plac
MODELS = {}
@plac.annotations(
models=("Comma-separated list of spaCy models", "positional", None, str),
host=("Host to serve API", "option", "ho", str),
port=("Port to serve API", "option", "p", int),
)
def main(models=None, host="0.0.0.0", port=8080):
if not models:
models = ["en_core_web_sm"]
else:
models = [m.strip() for m in models.split(",")]
for model in models:
load_model(model)
# Serving Hug API
app = hug.API(__name__)
app.http.add_middleware(CORSMiddleware(app))
waitress.serve(__hug_wsgi__, port=port)
def load_model(model):
print("Loading model '{}'...".format(model))
MODELS[model] = spacy.load(model)
def doc2json(doc: spacy.tokens.Doc, model: str):
json_doc = {
"text": doc.text,
"text_with_ws": doc.text_with_ws,
"cats": doc.cats,
"is_tagged": doc.is_tagged,
"is_parsed": doc.is_parsed,
"is_nered": doc.is_nered,
"is_sentenced": doc.is_sentenced,
}
ents = [
{"start": ent.start, "end": ent.end, "label": ent.label_} for ent in doc.ents
]
if doc.is_sentenced:
sents = [{"start": sent.start, "end": sent.end} for sent in doc.sents]
else:
sents = []
if doc.is_tagged and doc.is_parsed:
noun_chunks = [
{"start": chunk.start, "end": chunk.end} for chunk in doc.noun_chunks
]
else:
noun_chunks = []
tokens = [
{
"text": token.text,
"text_with_ws": token.text_with_ws,
"whitespace": token.whitespace_,
"orth": token.orth,
"i": token.i,
"ent_type": token.ent_type_,
"ent_iob": token.ent_iob_,
"lemma": token.lemma_,
"norm": token.norm_,
"lower": token.lower_,
"shape": token.shape_,
"prefix": token.prefix_,
"suffix": token.suffix_,
"pos": token.pos_,
"tag": token.tag_,
"dep": token.dep_,
"is_alpha": token.is_alpha,
"is_ascii": token.is_ascii,
"is_digit": token.is_digit,
"is_lower": token.is_lower,
"is_upper": token.is_upper,
"is_title": token.is_title,
"is_punct": token.is_punct,
"is_left_punct": token.is_left_punct,
"is_right_punct": token.is_right_punct,
"is_space": token.is_space,
"is_bracket": token.is_bracket,
"is_currency": token.is_currency,
"like_url": token.like_url,
"like_num": token.like_num,
"like_email": token.like_email,
"is_oov": token.is_oov,
"is_stop": token.is_stop,
"is_sent_start": token.is_sent_start,
"head": token.head.i,
}
for token in doc
]
return {
"model": model,
"doc": json_doc,
"ents": ents,
"sents": sents,
"noun_chunks": noun_chunks,
"tokens": tokens,
}
@hug.post("/parse")
def parse(model: str, text: str):
nlp = MODELS[model]
doc = nlp(text)
return doc2json(doc, model)
@hug.post("/similarity")
def similarity(model: str, text1: str, text2: str):
# We can always create Doc objects here, because the result is the same
nlp = MODELS[model]
doc1 = nlp(text1)
doc2 = nlp(text2)
return {"similarity": doc1.similarity(doc2)}
if __name__ == "__main__":
plac.call(main)