4
4
from entity .models import Entity , EntityAttr
5
5
from entry .models import Entry
6
6
7
+ SEARCH_ENTRY_LIMIT = 50
8
+
7
9
8
10
class ReferSerializer (serializers .Serializer ):
9
11
entity = serializers .CharField (max_length = 200 )
@@ -12,12 +14,24 @@ class ReferSerializer(serializers.Serializer):
12
14
attrs = serializers .ListField (required = False )
13
15
refers = serializers .ListField (required = False )
14
16
17
+ def validate_entity (self , entity_name ):
18
+ if not Entity .objects .filter (name = entity_name , is_active = True ).exists ():
19
+ raise ValidationError ("There is no specified Entity (%s)" % entity_name )
20
+
21
+ return entity_name
22
+
23
+ def validate (self , data ):
24
+ entity = Entity .objects .filter (name = data ["entity" ], is_active = True ).first ()
25
+ data ["entity_id" ] = entity .id
26
+ return data
27
+
15
28
16
29
class AttrSerializer (serializers .Serializer ):
17
30
name = serializers .CharField (max_length = 200 )
18
31
value = serializers .CharField (max_length = 200 , required = False , allow_blank = True )
19
32
is_any = serializers .BooleanField (default = False )
20
33
attrs = serializers .ListField (required = False )
34
+ refers = serializers .ListField (required = False )
21
35
22
36
def validate_name (self , name ):
23
37
if not EntityAttr .objects .filter (name = name , is_active = True ).exists ():
@@ -31,8 +45,8 @@ def validate_is_any(self, value):
31
45
32
46
class EntrySearchChainSerializer (serializers .Serializer ):
33
47
entities = serializers .ListField (child = serializers .CharField (max_length = 200 ))
34
- # conditions = serializers.ListField(child=SearchConditionSerializer() )
35
- conditions = serializers .ListField ()
48
+ attrs = serializers .ListField (child = AttrSerializer (), required = False )
49
+ refers = serializers .ListField (child = ReferSerializer (), required = False )
36
50
is_any = serializers .BooleanField (default = False )
37
51
38
52
def validate_is_any (self , value ):
@@ -95,14 +109,10 @@ def _get_serializer(condition):
95
109
else :
96
110
return AttrSerializer
97
111
98
- def _may_validate_and_complement_condition (condition , entities , serializer_hint = None ):
99
- serializer_class = serializer_hint
100
- if serializer_hint is None :
101
- serializer_class = _get_serializer (condition )
102
-
112
+ def _may_validate_and_complement_condition (condition , entities , serializer_class ):
103
113
serializer = serializer_class (data = condition )
104
114
if not serializer .is_valid ():
105
- raise ValidationError ("Invalid condition was specified" % str (condition ))
115
+ raise ValidationError ("Invalid condition(%s) was specified" % str (condition ))
106
116
107
117
if not entities :
108
118
raise ValidationError ("Condition(%s) couldn't find valid Entities" % str (condition ))
@@ -111,8 +121,12 @@ def _may_validate_and_complement_condition(condition, entities, serializer_hint=
111
121
if "name" in validated_data :
112
122
_validate_attribute (validated_data ["name" ], entities )
113
123
114
- # complement "entities" parameter at this condition
115
- _complement_entities (validated_data , entities )
124
+ if isinstance (serializer , AttrSerializer ):
125
+ # complement "entities" parameter at this condition
126
+ _complement_entities (validated_data , entities )
127
+
128
+ if isinstance (serializer , ReferSerializer ):
129
+ validated_data ["entities" ] = [validated_data ["entity_id" ]]
116
130
117
131
# call this method recursively to validate and complement value for each conditions
118
132
if "attrs" in validated_data :
@@ -123,19 +137,32 @@ def _may_validate_and_complement_condition(condition, entities, serializer_hint=
123
137
for x in validated_data ["attrs" ]
124
138
]
125
139
140
+ elif "refers" in validated_data :
141
+ validated_data ["refers" ] = [
142
+ _may_validate_and_complement_condition (
143
+ x , validated_data ["entities" ], ReferSerializer
144
+ )
145
+ for x in validated_data ["refers" ]
146
+ ]
147
+
126
148
return validated_data
127
149
128
- # validate and compelment "conditions" parameter context
129
- data ["conditions" ] = [
130
- _may_validate_and_complement_condition (x , data ["entities" ]) for x in data ["conditions" ]
131
- ]
150
+ # validate parameter context
151
+ if data .get ("attrs" ):
152
+ data ["attrs" ] = [
153
+ _may_validate_and_complement_condition (x , data ["entities" ], AttrSerializer )
154
+ for x in data ["attrs" ]
155
+ ]
132
156
133
- return data
157
+ if data .get ("refers" ):
158
+ data ["refers" ] = [
159
+ _may_validate_and_complement_condition (x , data ["entities" ], ReferSerializer )
160
+ for x in data ["refers" ]
161
+ ]
134
162
135
- def search_entries (self , user , query = None ):
136
- if query is None :
137
- query = self .validated_data
163
+ return data
138
164
165
+ def merge_search_result (self , stored_list , result_data , is_any ):
139
166
def _deduplication (item_list ):
140
167
"""
141
168
This removes duplication items, that have same Entry-ID with other ones,from item_list
@@ -147,92 +174,190 @@ def _deduplication(item_list):
147
174
148
175
return returned_items
149
176
150
- def _merge_search_result (stored_list , result_data , is_any ):
151
- if is_any :
152
- # This is OR condition processing
153
- result = result_data + stored_list
177
+ if is_any :
178
+ # This is OR condition processing
179
+ result = result_data + stored_list
154
180
155
- else :
156
- # This is AND condition processing
157
- # The "stored_id_list" is an explanatory variable that only has Entry-ID
158
- # of stored_list Entry information
181
+ else :
182
+ # This is AND condition processing
183
+ # The "stored_id_list" is an explanatory variable that only has Entry-ID
184
+ # of stored_list Entry information
185
+ if stored_list :
159
186
stored_id_list = [x ["id" ] for x in stored_list ]
160
187
result = [x for x in result_data if x ["id" ] in stored_id_list ]
188
+ else :
189
+ result = result_data
190
+
191
+ return _deduplication (result )
161
192
162
- return _deduplication (result )
193
+ def backward_search_entries (self , user , queries , entity_id_list , is_any ):
194
+ # digging into the condition tree to get to leaf condition by depth-first search
195
+ accumulated_result = []
163
196
164
- sub_queries = None
165
- if "conditions" in query :
166
- sub_queries = query ["conditions" ]
197
+ def _do_backward_search (sub_query , sub_query_result ):
198
+ # make query to search Entries using Entry.search_entries()
199
+ search_keyword = "|" .join ([x ["name" ] for x in sub_query_result ])
200
+ if isinstance (sub_query .get ("entry" ), str ) and len (sub_query ["entry" ]) > 0 :
201
+ search_keyword = sub_query .get ("entry" )
202
+
203
+ # Query for forward search
204
+ query_params = {
205
+ "user" : user ,
206
+ "hint_entity_ids" : entity_id_list ,
207
+ "hint_referral" : search_keyword ,
208
+ "hint_referral_entity_id" : sub_query ["entity_id" ],
209
+ "limit" : 99999 ,
210
+ }
211
+
212
+ # get Entry informations from result
213
+ search_result = Entry .search_entries (** query_params )
214
+
215
+ return [x ["entry" ] for x in search_result ["ret_values" ]]
216
+
217
+ # This expects only AttrSerialized sub-query
218
+ for sub_query in queries :
219
+ (is_leaf , sub_query_result ) = self .search_entries (user , sub_query )
220
+ if not is_leaf and not sub_query_result :
221
+ # In this case, it's useless to continue to search processing because
222
+ # there is no possiblity to find out data that user wants to.
223
+ return (False , [])
224
+
225
+ # This divides results into small chunks, that will be sent to the elasticsearch again
226
+ # when it has large amount of data. The size of each chunks is SEARCH_ENTRY_LIMIT
227
+ # at most.
228
+ dividing_index = 0
229
+ if len (sub_query_result ) > 0 :
230
+ search_results = []
231
+ while (dividing_index * SEARCH_ENTRY_LIMIT ) < len (sub_query_result ):
232
+ chunk_result = sub_query_result [
233
+ dividing_index
234
+ * SEARCH_ENTRY_LIMIT : (dividing_index + 1 )
235
+ * SEARCH_ENTRY_LIMIT
236
+ ]
237
+ dividing_index += 1
238
+
239
+ search_results += _do_backward_search (sub_query , chunk_result )
240
+
241
+ else :
242
+ # This search Entries with hint values from sub_query and sub_query_result
243
+ search_results = _do_backward_search (sub_query , sub_query_result )
167
244
168
- elif "attrs" in query :
169
- sub_queries = query ["attrs" ]
245
+ # merge result to the accumulated ones considering is_any value
246
+ accumulated_result = self .merge_search_result (
247
+ accumulated_result , search_results , is_any
248
+ )
170
249
171
- elif "refers" in query :
172
- raise RuntimeError ( "This is not impelemnted yet" )
250
+ # The first return value (False) describe this result returned by NO-leaf-node
251
+ return ( False , accumulated_result )
173
252
174
- # digging into the conditions tree to get to leaf condition by depth-first search
253
+ def forward_search_entries (self , user , queries , entity_id_list , is_any ):
254
+ # digging into the condition tree to get to leaf condition by depth-first search
175
255
accumulated_result = []
176
- if sub_queries :
177
- # This expects only AttrSerialized sub-query
178
- for sub_query in sub_queries :
179
- (is_leaf , sub_query_result ) = self .search_entries (user , sub_query )
180
-
181
- if not is_leaf and not sub_query_result :
182
- # In this case, it's useless to continue to search processing because
183
- # there is no possiblity to find out data that user wants to.
184
- return (False , [])
185
-
186
- # make query to search Entries using Entry.search_entries()
187
- search_keyword = "|" .join ([x ["name" ] for x in sub_query_result ])
188
- if isinstance (sub_query .get ("value" ), str ) and len (sub_query ["value" ]) > 0 :
189
- search_keyword = sub_query .get ("value" )
190
-
191
- elif sub_query .get ("value" ) == "" :
192
- # When value has empty string, this specify special character "\",
193
- # which will match Entries that refers nothing Entry at specified Attribute.
194
- search_keyword = "\\ "
195
-
196
- search_query = [
197
- {
198
- "name" : sub_query ["name" ],
199
- "keyword" : search_keyword ,
200
- }
201
- ]
202
256
203
- # get Entry informations from result
204
- search_result = Entry .search_entries (
205
- user , query ["entities" ], search_query , limit = 99999
206
- )
257
+ def _do_forward_search (sub_query , sub_query_result ):
258
+ # make query to search Entries using Entry.search_entries()
259
+ search_keyword = "|" .join ([x ["name" ] for x in sub_query_result ])
260
+ if isinstance (sub_query .get ("value" ), str ) and len (sub_query ["value" ]) > 0 :
261
+ search_keyword = sub_query .get ("value" )
262
+
263
+ elif sub_query .get ("value" ) == "" :
264
+ # When value has empty string, this specify special character "\",
265
+ # which will match Entries that refers nothing Entry at specified Attribute.
266
+ search_keyword = "\\ "
267
+
268
+ # Query for forward search
269
+ hint_attrs = [
270
+ {
271
+ "name" : sub_query ["name" ],
272
+ "keyword" : search_keyword ,
273
+ }
274
+ ]
275
+
276
+ # get Entry informations from result
277
+ search_result = Entry .search_entries (user , entity_id_list , hint_attrs , limit = 99999 )
278
+
279
+ return [x ["entry" ] for x in search_result ["ret_values" ]]
280
+
281
+ # This expects only AttrSerialized sub-query
282
+ for sub_query in queries :
283
+ (is_leaf , sub_query_result ) = self .search_entries (user , sub_query )
284
+ if not is_leaf and not sub_query_result :
285
+ # In this case, it's useless to continue to search processing because
286
+ # there is no possiblity to find out data that user wants to.
287
+ return (False , [])
288
+
289
+ # This divides results into small chunks, that will be sent to the elasticsearch again
290
+ # when it has large amount of data. The size of each chunks is SEARCH_ENTRY_LIMIT
291
+ # at most.
292
+ dividing_index = 0
293
+ if len (sub_query_result ) > 0 :
294
+ search_results = []
295
+ while (dividing_index * SEARCH_ENTRY_LIMIT ) < len (sub_query_result ):
296
+ chunk_result = sub_query_result [
297
+ dividing_index
298
+ * SEARCH_ENTRY_LIMIT : (dividing_index + 1 )
299
+ * SEARCH_ENTRY_LIMIT
300
+ ]
301
+ dividing_index += 1
302
+
303
+ search_results += _do_forward_search (sub_query , chunk_result )
207
304
208
- result_entry_info = [x ["entry" ] for x in search_result ["ret_values" ]]
209
- if not accumulated_result :
210
- accumulated_result = result_entry_info
211
- else :
212
- accumulated_result = _merge_search_result (
213
- accumulated_result , result_entry_info , query ["is_any" ]
214
- )
305
+ else :
306
+ # This search Entries with hint values from sub_query and sub_query_result
307
+ search_results = _do_forward_search (sub_query , sub_query_result )
215
308
216
- else :
217
- # In the leaf condition return nothing
218
- # The first return value describe whethere this is leaf condition or not.
219
- # (True means result is returned by leaf-node)
220
- #
221
- # Empty result of second returned value has different meaning depends on
222
- # whethere that is leaf condition or intermediate one.
223
- # * Leaf condition:
224
- # - it must return empty whatever condition.
225
- # it should continue processing
226
- #
227
- # * Intermediate one:
228
- # - it returns empty when there is no result.
229
- # it's useless to continue this processing because there is no possibility
230
- # to find out any data, which user wants to
231
- return (True , [])
309
+ # merge current result to the accumulated ones considering is_any value
310
+ accumulated_result = self .merge_search_result (
311
+ accumulated_result , search_results , is_any
312
+ )
232
313
233
314
# The first return value (False) describe this result returned by NO-leaf-node
234
315
return (False , accumulated_result )
235
316
317
+ def search_entries (self , user , query = None ):
318
+ if query is None :
319
+ query = self .validated_data
320
+
321
+ accumulated_result = []
322
+ is_leaf = True
323
+ if len (query .get ("attrs" , [])) > 0 :
324
+ is_leaf = False
325
+ sub_query = query .get ("attrs" , [])
326
+
327
+ (_ , results ) = self .forward_search_entries (
328
+ user , sub_query , query ["entities" ], query ["is_any" ]
329
+ )
330
+ accumulated_result = self .merge_search_result (
331
+ accumulated_result , results , query ["is_any" ]
332
+ )
333
+
334
+ if len (query .get ("refers" , [])) > 0 :
335
+ is_leaf = False
336
+ sub_query = query .get ("refers" , [])
337
+
338
+ (_ , results ) = self .backward_search_entries (
339
+ user , sub_query , query ["entities" ], query ["is_any" ]
340
+ )
341
+ accumulated_result = self .merge_search_result (
342
+ accumulated_result , results , query ["is_any" ]
343
+ )
344
+
345
+ # In the leaf condition return nothing
346
+ # The first return value describe whethere this is leaf condition or not.
347
+ # (True means result is returned by leaf-node)
348
+ #
349
+ # Empty result of second returned value has different meaning depends on
350
+ # whethere that is leaf condition or intermediate one.
351
+ # * Leaf condition:
352
+ # - it must return empty whatever condition.
353
+ # it should continue processing
354
+ #
355
+ # * Intermediate one:
356
+ # - it returns empty when there is no result.
357
+ # it's useless to continue this processing because there is no possibility
358
+ # to find out any data, which user wants to
359
+ return (is_leaf , accumulated_result )
360
+
236
361
def is_attr_chained (self , entry , attrs = None , is_any = False ):
237
362
if not attrs :
238
363
attrs = self .validated_data ["attrs" ]
0 commit comments