forked from inveniosoftware/invenio-search
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapi.py
172 lines (141 loc) · 5.24 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Search engine API."""
from __future__ import unicode_literals
from flask import current_app
from flask_login import current_user
from elasticsearch.helpers import scan
from invenio_base.globals import cfg
from invenio_base.helpers import unicodifier
import pypeg2
from werkzeug.utils import cached_property
from .utils import parser, query_enhancers, query_walkers, search_walkers
from .walkers.elasticsearch_no_keywords import ElasticSearchNoKeywordsDSL
from .walkers.elasticsearch_no_keywords import QueryHasKeywords
from .walkers.match_unit import MatchUnit
from .walkers.terms import Terms
class Query(object):
"""Search engine implemetation.
.. versionadded:: 2.1
New search and match API.
"""
def __init__(self, query):
"""Initialize with search query."""
self._query = unicodifier(query)
@cached_property
def query(self):
"""Parse query string using given grammar."""
try:
tree = pypeg2.parse(self._query, parser(), whitespace="")
except SyntaxError:
from invenio_query_parser.ast import MalformedQuery
return MalformedQuery("")
for walker in query_walkers():
tree = tree.accept(walker)
return tree
def search(self, user_info=None, collection=None, enhance=True, **kwargs):
"""Search records."""
user_info = user_info or current_user
# Enhance query first
query = self.query
current_app.logger.debug(query)
if enhance:
for enhancer in query_enhancers():
query = enhancer(query, user_info=user_info,
collection=collection)
try:
walker = ElasticSearchNoKeywordsDSL()
query.accept(walker)
query = {
"multi_match": {
"query": self._query,
"zero_terms_query": "all",
"fields": [
"title^3",
"title.raw^10",
"abstract^2",
"abstract.raw^4",
"author^10",
"author.raw^15",
"reportnumber^10",
"eprint^10",
"doi^10"]}}
except QueryHasKeywords:
for walker in search_walkers():
query = query.accept(walker)
index = cfg["SEARCH_ELASTIC_COLLECTION_INDEX_MAPPING"].get(
collection,
cfg["SEARCH_ELASTIC_DEFAULT_INDEX"]
)
return Results(query, index=index)
def match(self, record, user_info=None):
"""Return True if record match the query."""
return self.query.accept(MatchUnit(record))
def terms(self, keywords=None):
"""Return list of terms for given keywords in query pattern."""
return self.query.accept(Terms(keywords=keywords))
class Results(object):
def __init__(self, query, index=None, doc_type=None, **kwargs):
self.body = {
'from': 0,
'size': 10,
'query': query,
}
self.body.update(kwargs)
self.index = index
self.doc_type = doc_type or 'record'
self._results = None
@property
def recids(self):
from intbitset import intbitset
from invenio_ext.es import es
results = scan(
es,
query={
'fields': [],
'query': self.body.get("query")
},
index=self.index,
doc_type=self.doc_type,
)
return intbitset([int(r['_id']) for r in results])
def _search(self):
from invenio_ext.es import es
if self._results is None:
if current_app.debug:
import json
json_body = json.dumps(self.body, indent=2)
current_app.logger.debug(
"index: {0} - doc_type: {1} - query: {2}".format(
self.index,
self.doc_type,
json_body
)
)
self._results = es.search(
index=self.index,
doc_type=self.doc_type,
body=self.body,
)
return self._results
def records(self):
from invenio_records.api import Record
return [Record(r['_source']) for r in self._search()['hits']['hits']]
def __len__(self):
return self._search()['hits']['total']