Skip to content

Commit 24939ba

Browse files
authored
Merge pull request #669 from userlocalhost/enhancement/search_entry_chain/enable_backward_search_entries
Added API handler that searches Entries across multiple referral structure (/api/v1/search_chain)
2 parents 47e9d8e + 09464a8 commit 24939ba

File tree

4 files changed

+594
-128
lines changed

4 files changed

+594
-128
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
## In development
44

55
### Added
6+
* Added API handler that searchs Entries across multiple referral structure
7+
(/api/v1/search_chain)
8+
Contributed by @userlocalhost, @hinashi
69

710
### Changed
811
* Improved processing to get referred Entry from elasticsearch

api_v1/entry/serializer.py

+213-88
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from entity.models import Entity, EntityAttr
55
from entry.models import Entry
66

7+
SEARCH_ENTRY_LIMIT = 50
8+
79

810
class ReferSerializer(serializers.Serializer):
911
entity = serializers.CharField(max_length=200)
@@ -12,12 +14,24 @@ class ReferSerializer(serializers.Serializer):
1214
attrs = serializers.ListField(required=False)
1315
refers = serializers.ListField(required=False)
1416

17+
def validate_entity(self, entity_name):
18+
if not Entity.objects.filter(name=entity_name, is_active=True).exists():
19+
raise ValidationError("There is no specified Entity (%s)" % entity_name)
20+
21+
return entity_name
22+
23+
def validate(self, data):
24+
entity = Entity.objects.filter(name=data["entity"], is_active=True).first()
25+
data["entity_id"] = entity.id
26+
return data
27+
1528

1629
class AttrSerializer(serializers.Serializer):
1730
name = serializers.CharField(max_length=200)
1831
value = serializers.CharField(max_length=200, required=False, allow_blank=True)
1932
is_any = serializers.BooleanField(default=False)
2033
attrs = serializers.ListField(required=False)
34+
refers = serializers.ListField(required=False)
2135

2236
def validate_name(self, name):
2337
if not EntityAttr.objects.filter(name=name, is_active=True).exists():
@@ -31,8 +45,8 @@ def validate_is_any(self, value):
3145

3246
class EntrySearchChainSerializer(serializers.Serializer):
3347
entities = serializers.ListField(child=serializers.CharField(max_length=200))
34-
# conditions = serializers.ListField(child=SearchConditionSerializer())
35-
conditions = serializers.ListField()
48+
attrs = serializers.ListField(child=AttrSerializer(), required=False)
49+
refers = serializers.ListField(child=ReferSerializer(), required=False)
3650
is_any = serializers.BooleanField(default=False)
3751

3852
def validate_is_any(self, value):
@@ -95,14 +109,10 @@ def _get_serializer(condition):
95109
else:
96110
return AttrSerializer
97111

98-
def _may_validate_and_complement_condition(condition, entities, serializer_hint=None):
99-
serializer_class = serializer_hint
100-
if serializer_hint is None:
101-
serializer_class = _get_serializer(condition)
102-
112+
def _may_validate_and_complement_condition(condition, entities, serializer_class):
103113
serializer = serializer_class(data=condition)
104114
if not serializer.is_valid():
105-
raise ValidationError("Invalid condition was specified" % str(condition))
115+
raise ValidationError("Invalid condition(%s) was specified" % str(condition))
106116

107117
if not entities:
108118
raise ValidationError("Condition(%s) couldn't find valid Entities" % str(condition))
@@ -111,8 +121,12 @@ def _may_validate_and_complement_condition(condition, entities, serializer_hint=
111121
if "name" in validated_data:
112122
_validate_attribute(validated_data["name"], entities)
113123

114-
# complement "entities" parameter at this condition
115-
_complement_entities(validated_data, entities)
124+
if isinstance(serializer, AttrSerializer):
125+
# complement "entities" parameter at this condition
126+
_complement_entities(validated_data, entities)
127+
128+
if isinstance(serializer, ReferSerializer):
129+
validated_data["entities"] = [validated_data["entity_id"]]
116130

117131
# call this method recursively to validate and complement value for each conditions
118132
if "attrs" in validated_data:
@@ -123,19 +137,32 @@ def _may_validate_and_complement_condition(condition, entities, serializer_hint=
123137
for x in validated_data["attrs"]
124138
]
125139

140+
elif "refers" in validated_data:
141+
validated_data["refers"] = [
142+
_may_validate_and_complement_condition(
143+
x, validated_data["entities"], ReferSerializer
144+
)
145+
for x in validated_data["refers"]
146+
]
147+
126148
return validated_data
127149

128-
# validate and compelment "conditions" parameter context
129-
data["conditions"] = [
130-
_may_validate_and_complement_condition(x, data["entities"]) for x in data["conditions"]
131-
]
150+
# validate parameter context
151+
if data.get("attrs"):
152+
data["attrs"] = [
153+
_may_validate_and_complement_condition(x, data["entities"], AttrSerializer)
154+
for x in data["attrs"]
155+
]
132156

133-
return data
157+
if data.get("refers"):
158+
data["refers"] = [
159+
_may_validate_and_complement_condition(x, data["entities"], ReferSerializer)
160+
for x in data["refers"]
161+
]
134162

135-
def search_entries(self, user, query=None):
136-
if query is None:
137-
query = self.validated_data
163+
return data
138164

165+
def merge_search_result(self, stored_list, result_data, is_any):
139166
def _deduplication(item_list):
140167
"""
141168
This removes duplication items, that have same Entry-ID with other ones,from item_list
@@ -147,92 +174,190 @@ def _deduplication(item_list):
147174

148175
return returned_items
149176

150-
def _merge_search_result(stored_list, result_data, is_any):
151-
if is_any:
152-
# This is OR condition processing
153-
result = result_data + stored_list
177+
if is_any:
178+
# This is OR condition processing
179+
result = result_data + stored_list
154180

155-
else:
156-
# This is AND condition processing
157-
# The "stored_id_list" is an explanatory variable that only has Entry-ID
158-
# of stored_list Entry information
181+
else:
182+
# This is AND condition processing
183+
# The "stored_id_list" is an explanatory variable that only has Entry-ID
184+
# of stored_list Entry information
185+
if stored_list:
159186
stored_id_list = [x["id"] for x in stored_list]
160187
result = [x for x in result_data if x["id"] in stored_id_list]
188+
else:
189+
result = result_data
190+
191+
return _deduplication(result)
161192

162-
return _deduplication(result)
193+
def backward_search_entries(self, user, queries, entity_id_list, is_any):
194+
# digging into the condition tree to get to leaf condition by depth-first search
195+
accumulated_result = []
163196

164-
sub_queries = None
165-
if "conditions" in query:
166-
sub_queries = query["conditions"]
197+
def _do_backward_search(sub_query, sub_query_result):
198+
# make query to search Entries using Entry.search_entries()
199+
search_keyword = "|".join([x["name"] for x in sub_query_result])
200+
if isinstance(sub_query.get("entry"), str) and len(sub_query["entry"]) > 0:
201+
search_keyword = sub_query.get("entry")
202+
203+
# Query for forward search
204+
query_params = {
205+
"user": user,
206+
"hint_entity_ids": entity_id_list,
207+
"hint_referral": search_keyword,
208+
"hint_referral_entity_id": sub_query["entity_id"],
209+
"limit": 99999,
210+
}
211+
212+
# get Entry informations from result
213+
search_result = Entry.search_entries(**query_params)
214+
215+
return [x["entry"] for x in search_result["ret_values"]]
216+
217+
# This expects only AttrSerialized sub-query
218+
for sub_query in queries:
219+
(is_leaf, sub_query_result) = self.search_entries(user, sub_query)
220+
if not is_leaf and not sub_query_result:
221+
# In this case, it's useless to continue to search processing because
222+
# there is no possiblity to find out data that user wants to.
223+
return (False, [])
224+
225+
# This divides results into small chunks, that will be sent to the elasticsearch again
226+
# when it has large amount of data. The size of each chunks is SEARCH_ENTRY_LIMIT
227+
# at most.
228+
dividing_index = 0
229+
if len(sub_query_result) > 0:
230+
search_results = []
231+
while (dividing_index * SEARCH_ENTRY_LIMIT) < len(sub_query_result):
232+
chunk_result = sub_query_result[
233+
dividing_index
234+
* SEARCH_ENTRY_LIMIT : (dividing_index + 1)
235+
* SEARCH_ENTRY_LIMIT
236+
]
237+
dividing_index += 1
238+
239+
search_results += _do_backward_search(sub_query, chunk_result)
240+
241+
else:
242+
# This search Entries with hint values from sub_query and sub_query_result
243+
search_results = _do_backward_search(sub_query, sub_query_result)
167244

168-
elif "attrs" in query:
169-
sub_queries = query["attrs"]
245+
# merge result to the accumulated ones considering is_any value
246+
accumulated_result = self.merge_search_result(
247+
accumulated_result, search_results, is_any
248+
)
170249

171-
elif "refers" in query:
172-
raise RuntimeError("This is not impelemnted yet")
250+
# The first return value (False) describe this result returned by NO-leaf-node
251+
return (False, accumulated_result)
173252

174-
# digging into the conditions tree to get to leaf condition by depth-first search
253+
def forward_search_entries(self, user, queries, entity_id_list, is_any):
254+
# digging into the condition tree to get to leaf condition by depth-first search
175255
accumulated_result = []
176-
if sub_queries:
177-
# This expects only AttrSerialized sub-query
178-
for sub_query in sub_queries:
179-
(is_leaf, sub_query_result) = self.search_entries(user, sub_query)
180-
181-
if not is_leaf and not sub_query_result:
182-
# In this case, it's useless to continue to search processing because
183-
# there is no possiblity to find out data that user wants to.
184-
return (False, [])
185-
186-
# make query to search Entries using Entry.search_entries()
187-
search_keyword = "|".join([x["name"] for x in sub_query_result])
188-
if isinstance(sub_query.get("value"), str) and len(sub_query["value"]) > 0:
189-
search_keyword = sub_query.get("value")
190-
191-
elif sub_query.get("value") == "":
192-
# When value has empty string, this specify special character "\",
193-
# which will match Entries that refers nothing Entry at specified Attribute.
194-
search_keyword = "\\"
195-
196-
search_query = [
197-
{
198-
"name": sub_query["name"],
199-
"keyword": search_keyword,
200-
}
201-
]
202256

203-
# get Entry informations from result
204-
search_result = Entry.search_entries(
205-
user, query["entities"], search_query, limit=99999
206-
)
257+
def _do_forward_search(sub_query, sub_query_result):
258+
# make query to search Entries using Entry.search_entries()
259+
search_keyword = "|".join([x["name"] for x in sub_query_result])
260+
if isinstance(sub_query.get("value"), str) and len(sub_query["value"]) > 0:
261+
search_keyword = sub_query.get("value")
262+
263+
elif sub_query.get("value") == "":
264+
# When value has empty string, this specify special character "\",
265+
# which will match Entries that refers nothing Entry at specified Attribute.
266+
search_keyword = "\\"
267+
268+
# Query for forward search
269+
hint_attrs = [
270+
{
271+
"name": sub_query["name"],
272+
"keyword": search_keyword,
273+
}
274+
]
275+
276+
# get Entry informations from result
277+
search_result = Entry.search_entries(user, entity_id_list, hint_attrs, limit=99999)
278+
279+
return [x["entry"] for x in search_result["ret_values"]]
280+
281+
# This expects only AttrSerialized sub-query
282+
for sub_query in queries:
283+
(is_leaf, sub_query_result) = self.search_entries(user, sub_query)
284+
if not is_leaf and not sub_query_result:
285+
# In this case, it's useless to continue to search processing because
286+
# there is no possiblity to find out data that user wants to.
287+
return (False, [])
288+
289+
# This divides results into small chunks, that will be sent to the elasticsearch again
290+
# when it has large amount of data. The size of each chunks is SEARCH_ENTRY_LIMIT
291+
# at most.
292+
dividing_index = 0
293+
if len(sub_query_result) > 0:
294+
search_results = []
295+
while (dividing_index * SEARCH_ENTRY_LIMIT) < len(sub_query_result):
296+
chunk_result = sub_query_result[
297+
dividing_index
298+
* SEARCH_ENTRY_LIMIT : (dividing_index + 1)
299+
* SEARCH_ENTRY_LIMIT
300+
]
301+
dividing_index += 1
302+
303+
search_results += _do_forward_search(sub_query, chunk_result)
207304

208-
result_entry_info = [x["entry"] for x in search_result["ret_values"]]
209-
if not accumulated_result:
210-
accumulated_result = result_entry_info
211-
else:
212-
accumulated_result = _merge_search_result(
213-
accumulated_result, result_entry_info, query["is_any"]
214-
)
305+
else:
306+
# This search Entries with hint values from sub_query and sub_query_result
307+
search_results = _do_forward_search(sub_query, sub_query_result)
215308

216-
else:
217-
# In the leaf condition return nothing
218-
# The first return value describe whethere this is leaf condition or not.
219-
# (True means result is returned by leaf-node)
220-
#
221-
# Empty result of second returned value has different meaning depends on
222-
# whethere that is leaf condition or intermediate one.
223-
# * Leaf condition:
224-
# - it must return empty whatever condition.
225-
# it should continue processing
226-
#
227-
# * Intermediate one:
228-
# - it returns empty when there is no result.
229-
# it's useless to continue this processing because there is no possibility
230-
# to find out any data, which user wants to
231-
return (True, [])
309+
# merge current result to the accumulated ones considering is_any value
310+
accumulated_result = self.merge_search_result(
311+
accumulated_result, search_results, is_any
312+
)
232313

233314
# The first return value (False) describe this result returned by NO-leaf-node
234315
return (False, accumulated_result)
235316

317+
def search_entries(self, user, query=None):
318+
if query is None:
319+
query = self.validated_data
320+
321+
accumulated_result = []
322+
is_leaf = True
323+
if len(query.get("attrs", [])) > 0:
324+
is_leaf = False
325+
sub_query = query.get("attrs", [])
326+
327+
(_, results) = self.forward_search_entries(
328+
user, sub_query, query["entities"], query["is_any"]
329+
)
330+
accumulated_result = self.merge_search_result(
331+
accumulated_result, results, query["is_any"]
332+
)
333+
334+
if len(query.get("refers", [])) > 0:
335+
is_leaf = False
336+
sub_query = query.get("refers", [])
337+
338+
(_, results) = self.backward_search_entries(
339+
user, sub_query, query["entities"], query["is_any"]
340+
)
341+
accumulated_result = self.merge_search_result(
342+
accumulated_result, results, query["is_any"]
343+
)
344+
345+
# In the leaf condition return nothing
346+
# The first return value describe whethere this is leaf condition or not.
347+
# (True means result is returned by leaf-node)
348+
#
349+
# Empty result of second returned value has different meaning depends on
350+
# whethere that is leaf condition or intermediate one.
351+
# * Leaf condition:
352+
# - it must return empty whatever condition.
353+
# it should continue processing
354+
#
355+
# * Intermediate one:
356+
# - it returns empty when there is no result.
357+
# it's useless to continue this processing because there is no possibility
358+
# to find out any data, which user wants to
359+
return (is_leaf, accumulated_result)
360+
236361
def is_attr_chained(self, entry, attrs=None, is_any=False):
237362
if not attrs:
238363
attrs = self.validated_data["attrs"]

0 commit comments

Comments
 (0)