From 10dd330265e7beb02d6d1ce420c7547d121d7e2c Mon Sep 17 00:00:00 2001 From: Dustin Lo Date: Wed, 12 Jul 2023 11:30:42 -0700 Subject: [PATCH] HC-476: Make user dataset rule evaluation more efficient/targetted (#62) * added index_pattern to user rules * using validate_index_pattern from hysds.utils --- config/user_rules_dataset.mapping | 3 +++ grq2/services/api_v01/user_rules.py | 26 ++++++++++++++++++++++++++ grq2/services/api_v02/user_rules.py | 26 ++++++++++++++++++++++++++ setup.py | 2 +- 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/config/user_rules_dataset.mapping b/config/user_rules_dataset.mapping index 9ece0de..9ee77cb 100644 --- a/config/user_rules_dataset.mapping +++ b/config/user_rules_dataset.mapping @@ -32,6 +32,9 @@ "query_string": { "type": "text" }, + "index_pattern": { + "type": "keyword" + }, "queue": { "type": "text" }, diff --git a/grq2/services/api_v01/user_rules.py b/grq2/services/api_v01/user_rules.py index 6d547ed..d9612c5 100644 --- a/grq2/services/api_v01/user_rules.py +++ b/grq2/services/api_v01/user_rules.py @@ -13,6 +13,7 @@ from flask_restx import Resource, inputs from hysds.celery import app as celery_app +from hysds.utils import validate_index_pattern from hysds_commons.action_utils import check_passthrough_query from grq2 import app, mozart_es @@ -38,6 +39,7 @@ class UserRules(Resource): post_parser.add_argument('hysds_io', type=str, required=True, location='form', help='hysds io') post_parser.add_argument('job_spec', type=str, required=True, location='form', help='queue') post_parser.add_argument('priority', type=int, required=True, location='form', help='RabbitMQ job priority (0-9)') + post_parser.add_argument('index_pattern', type=str, required=True, location='form', help='ES index pattern') post_parser.add_argument('query_string', type=str, required=True, location='form', help='elasticsearch query') post_parser.add_argument('kwargs', type=str, required=True, location='form', help='keyword arguments for PGE') post_parser.add_argument('queue', type=str, required=True, location='form', help='RabbitMQ job queue') @@ -53,6 +55,7 @@ class UserRules(Resource): put_parser.add_argument('hysds_io', type=str, location='form', help='hysds io') put_parser.add_argument('job_spec', type=str, location='form', help='queue') put_parser.add_argument('priority', type=int, location='form', help='RabbitMQ job priority (0-9)') + put_parser.add_argument('index_pattern', type=str, required=True, location='form', help='ES index pattern') put_parser.add_argument('query_string', type=str, location='form', help='elasticsearch query') put_parser.add_argument('kwargs', type=str, location='form', help='keyword arguments for PGE') put_parser.add_argument('queue', type=str, location='form', help='RabbitMQ job queue') @@ -119,6 +122,7 @@ def post(self): job_spec = request_data.get('job_spec') priority = int(request_data.get('priority', 0)) query_string = request_data.get('query_string') + index_pattern = request_data.get('index_pattern', "").strip() kwargs = request_data.get('kwargs', '{}') queue = request_data.get('queue') tags = request_data.get('tags', []) @@ -217,6 +221,15 @@ def post(self): "tags": tags } + if not validate_index_pattern(index_pattern): + return { + 'success': False, + 'message': "index pattern is too broad" + }, 400 + + if index_pattern: + new_doc["index_pattern"] = index_pattern + if time_limit and isinstance(time_limit, int): if time_limit <= 0 or time_limit > 86400 * 7: return { @@ -264,6 +277,7 @@ def put(self): # TODO: add user role and permissions job_spec = request_data.get('job_spec') priority = request_data.get('priority') query_string = request_data.get('query_string') + index_pattern = request_data.get('index_pattern', "").strip() kwargs = request_data.get('kwargs') queue = request_data.get('queue') enabled = request_data.get('enabled') @@ -333,6 +347,18 @@ def put(self): # TODO: add user role and permissions 'success': False, 'message': 'invalid elasticsearch query JSON' }, 400 + + if "index_pattern" in request_data: + if not validate_index_pattern(index_pattern): + return { + 'success': False, + 'message': "index pattern is too broad" + }, 400 + if index_pattern: + update_doc["index_pattern"] = index_pattern # noqa + else: + update_doc["index_pattern"] = None + if kwargs: update_doc['kwargs'] = kwargs try: diff --git a/grq2/services/api_v02/user_rules.py b/grq2/services/api_v02/user_rules.py index bbb08aa..7a0de40 100644 --- a/grq2/services/api_v02/user_rules.py +++ b/grq2/services/api_v02/user_rules.py @@ -13,6 +13,7 @@ from flask_restx import Resource, inputs from hysds.celery import app as celery_app +from hysds.utils import validate_index_pattern from hysds_commons.action_utils import check_passthrough_query from grq2 import app, mozart_es @@ -39,6 +40,7 @@ class UserRules(Resource): post_parser.add_argument('job_spec', type=str, required=True, location='form', help='queue') post_parser.add_argument('priority', type=int, required=True, location='form', help='RabbitMQ job priority (0-9)') post_parser.add_argument('query_string', type=str, required=True, location='form', help='elasticsearch query') + post_parser.add_argument('index_pattern', type=str, required=True, location='form', help='ES index pattern') post_parser.add_argument('kwargs', type=str, required=True, location='form', help='keyword arguments for PGE') post_parser.add_argument('queue', type=str, required=True, location='form', help='RabbitMQ job queue') post_parser.add_argument('tags', type=list, location='form', help='user defined tags for trigger rule') @@ -54,6 +56,7 @@ class UserRules(Resource): put_parser.add_argument('job_spec', type=str, location='form', help='queue') put_parser.add_argument('priority', type=int, location='form', help='RabbitMQ job priority (0-9)') put_parser.add_argument('query_string', type=str, location='form', help='elasticsearch query') + post_parser.add_argument('index_pattern', type=str, required=True, location='form', help='ES index pattern') put_parser.add_argument('kwargs', type=str, location='form', help='keyword arguments for PGE') put_parser.add_argument('queue', type=str, location='form', help='RabbitMQ job queue') put_parser.add_argument('tags', type=list, location='form', help='user defined tags for trigger rule') @@ -119,6 +122,7 @@ def post(self): job_spec = request_data.get('job_spec') priority = int(request_data.get('priority', 0)) query_string = request_data.get('query_string') + index_pattern = request_data.get('index_pattern', "").strip() kwargs = request_data.get('kwargs', '{}') queue = request_data.get('queue') tags = request_data.get('tags', []) @@ -217,6 +221,15 @@ def post(self): "tags": tags } + if not validate_index_pattern(index_pattern): + return { + 'success': False, + 'message': "index pattern is too broad" + }, 400 + + if index_pattern: + new_doc["index_pattern"] = index_pattern + if time_limit and isinstance(time_limit, int): if time_limit <= 0 or time_limit > 86400 * 7: return { @@ -264,6 +277,7 @@ def put(self): # TODO: add user role and permissions job_spec = request_data.get('job_spec') priority = request_data.get('priority') query_string = request_data.get('query_string') + index_pattern = request_data.get('index_pattern', "").strip() kwargs = request_data.get('kwargs') queue = request_data.get('queue') enabled = request_data.get('enabled') @@ -333,6 +347,18 @@ def put(self): # TODO: add user role and permissions 'success': False, 'message': 'invalid elasticsearch query JSON' }, 400 + + if "index_pattern" in request_data: + if not validate_index_pattern(index_pattern): + return { + 'success': False, + 'message': "index pattern is too broad" + }, 400 + if index_pattern: + update_doc["index_pattern"] = index_pattern # noqa + else: + update_doc["index_pattern"] = None + if kwargs: update_doc['kwargs'] = kwargs try: diff --git a/setup.py b/setup.py index b36c310..2a351ad 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='grq2', - version='2.0.23', + version='2.0.24', long_description='GeoRegionQuery REST API using ElasticSearch backend', packages=find_packages(), include_package_data=True,