diff --git a/corehq/apps/data_cleaning/exceptions.py b/corehq/apps/data_cleaning/exceptions.py index cbd0a6506a29..be54eac04b12 100644 --- a/corehq/apps/data_cleaning/exceptions.py +++ b/corehq/apps/data_cleaning/exceptions.py @@ -1,2 +1,10 @@ class UnsupportedActionException(Exception): """Raised when an unknown action is encountered""" + + +class UnsupportedFilterValueException(Exception): + """ + Raised when a BulkEditColumnFilter has a value that is unsupported by + its FilterMatchType and DataType combination. This is rare, + as the filter creation form should catch most of the issues. + """ diff --git a/corehq/apps/data_cleaning/migrations/0003_column_filter_unique_ids_match_type_updates.py b/corehq/apps/data_cleaning/migrations/0003_column_filter_unique_ids_match_type_updates.py new file mode 100644 index 000000000000..fd21fdaa45e3 --- /dev/null +++ b/corehq/apps/data_cleaning/migrations/0003_column_filter_unique_ids_match_type_updates.py @@ -0,0 +1,40 @@ +# Generated by Django 4.2.18 on 2025-02-25 12:35 + +from django.db import migrations, models +import uuid + + +def delete_columns_and_filters(apps, schema_editor): + # this migration was written when none of these models are used in production, + # or even generated by a non-local UI + # Therefore, it's safe to just delete everything and start over. + BulkEditColumnFilter = apps.get_model("data_cleaning", "BulkEditColumnFilter") + BulkEditColumnFilter.objects.all().delete() + BulkEditColumn = apps.get_model("data_cleaning", "BulkEditColumn") + BulkEditColumn.objects.all().delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('data_cleaning', '0002_update_fields_and_ordering'), + ] + + operations = [ + migrations.RunPython(delete_columns_and_filters, reverse_code=migrations.RunPython.noop), + migrations.AddField( + model_name='bulkeditcolumn', + name='column_id', + field=models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, unique=True), + ), + migrations.AddField( + model_name='bulkeditcolumnfilter', + name='filter_id', + field=models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, unique=True), + ), + migrations.AlterField( + model_name='bulkeditcolumnfilter', + name='match_type', + field=models.CharField(choices=[('exact', 'exact'), ('is_not', 'is_not'), ('starts', 'starts'), ('starts_not', 'starts_not'), ('is_empty', 'is_empty'), ('is_not_empty', 'is_not_empty'), ('missing', 'missing'), ('not_missing', 'not_missing'), ('fuzzy', 'fuzzy'), ('not_fuzzy', 'not_fuzzy'), ('phonetic', 'phonetic'), ('not_phonetic', 'not_phonetic'), ('lt', 'lt'), ('gt', 'gt'), ('lte', 'lte'), ('gte', 'gte'), ('is_any', 'is_any'), ('is_not_any', 'is_not_any'), ('is_all', 'is_all'), ('is_not_all', 'is_not_all')], default='exact', max_length=12), + ), + ] diff --git a/corehq/apps/data_cleaning/models.py b/corehq/apps/data_cleaning/models.py index 011cccd7ef89..4e1378b67498 100644 --- a/corehq/apps/data_cleaning/models.py +++ b/corehq/apps/data_cleaning/models.py @@ -7,7 +7,11 @@ from django.utils.translation import gettext_lazy, gettext as _ from corehq.apps.case_search.const import METADATA_IN_REPORTS -from corehq.apps.data_cleaning.exceptions import UnsupportedActionException +from corehq.apps.data_cleaning.exceptions import ( + UnsupportedActionException, + UnsupportedFilterValueException, +) +from corehq.apps.es import CaseSearchES class BulkEditSessionType: @@ -96,6 +100,44 @@ def status(self): return "in progress" return "pending" + def add_column_filter(self, prop_id, data_type, match_type, value=None): + BulkEditColumnFilter.objects.create( + session=self, + index=self.column_filters.count(), + prop_id=prop_id, + data_type=data_type, + match_type=match_type, + value=value, + ) + + def reorder_column_filters(self, filter_ids): + """ + This updates the order of column filters for this session + :param filter_ids: list of uuids matching filter_id field of BulkEditColumnFilters + """ + if len(filter_ids) != self.column_filters.count(): + raise ValueError("the lengths of column_ids and available column filters do not match") + for index, filter_id in enumerate(filter_ids): + column_filter = self.column_filters.get(filter_id=filter_id) + column_filter.index = index + column_filter.save() + + def get_queryset(self): + query = CaseSearchES().domain(self.domain).case_type(self.identifier) + query = self._apply_column_filters(query) + return query + + def _apply_column_filters(self, query): + xpath_expressions = [] + for column_filter in self.column_filters.all(): + query = column_filter.filter_query(query) + column_xpath = column_filter.get_xpath_expression() + if column_xpath is not None: + xpath_expressions.append(column_xpath) + if xpath_expressions: + query = query.xpath_query(self.domain, " and ".join(xpath_expressions)) + return query + class DataType: TEXT = 'text' @@ -126,19 +168,31 @@ class DataType: (PASSWORD, gettext_lazy("Password")), ) + FILTER_CATEGORY_TEXT = 'filter_text' + FILTER_CATEGORY_NUMBER = 'filter_number' + FILTER_CATEGORY_DATE = 'filter_date' + FILTER_CATEGORY_MULTI_SELECT = 'filter_multi_select' + + FILTER_CATEGORY_DATA_TYPES = { + FILTER_CATEGORY_TEXT: (TEXT, PHONE_NUMBER, BARCODE, PASSWORD, GPS, SINGLE_OPTION, TIME,), + FILTER_CATEGORY_NUMBER: (INTEGER, DECIMAL,), + FILTER_CATEGORY_DATE: (DATE, DATETIME,), + FILTER_CATEGORY_MULTI_SELECT: (MULTIPLE_OPTION,), + } + class FilterMatchType: EXACT = "exact" IS_NOT = "is_not" STARTS = "starts" - ENDS = "ends" + STARTS_NOT = "starts_not" IS_EMPTY = "is_empty" # empty string IS_NOT_EMPTY = "is_not_empty" - IS_NULL = "is_null" # un-set - IS_NOT_NULL = "is_not_null" + IS_MISSING = "missing" # un-set + IS_NOT_MISSING = "not_missing" FUZZY = "fuzzy" # will use fuzzy-match from CQL FUZZY_NOT = "not_fuzzy" # will use not(fuzzy-match()) from CQL @@ -149,6 +203,9 @@ class FilterMatchType: LESS_THAN = "lt" GREATER_THAN = "gt" + LESS_THAN_EQUAL = "lte" + GREATER_THAN_EQUAL = "gte" + IS_ANY = "is_any" # we will use selected-any from CQL IS_NOT_ANY = "is_not_any" # we will use not(selected-any()) from CQL @@ -159,38 +216,42 @@ class FilterMatchType: (EXACT, EXACT), (IS_NOT, IS_NOT), (STARTS, STARTS), - (ENDS, ENDS), + (STARTS_NOT, STARTS_NOT), (IS_EMPTY, IS_EMPTY), (IS_NOT_EMPTY, IS_NOT_EMPTY), - (IS_NULL, IS_NULL), - (IS_NOT_NULL, IS_NOT_NULL), + (IS_MISSING, IS_MISSING), + (IS_NOT_MISSING, IS_NOT_MISSING), (FUZZY, FUZZY), (FUZZY_NOT, FUZZY_NOT), (PHONETIC, PHONETIC), (PHONETIC_NOT, PHONETIC_NOT), (LESS_THAN, LESS_THAN), (GREATER_THAN, GREATER_THAN), + (LESS_THAN_EQUAL, LESS_THAN_EQUAL), + (GREATER_THAN_EQUAL, GREATER_THAN_EQUAL), (IS_ANY, IS_ANY), (IS_NOT_ANY, IS_NOT_ANY), (IS_ALL, IS_ALL), (IS_NOT_ALL, IS_NOT_ALL), ) + # choices valid for all data types + ALL_DATA_TYPES_CHOICES = ( + (IS_EMPTY, gettext_lazy("is empty")), + (IS_NOT_EMPTY, gettext_lazy("is not empty")), + (IS_MISSING, gettext_lazy("is missing")), + (IS_NOT_MISSING, gettext_lazy("is not missing")), + ) + TEXT_CHOICES = ( (EXACT, gettext_lazy("is exactly")), (IS_NOT, gettext_lazy("is not")), (STARTS, gettext_lazy("starts with")), - (ENDS, gettext_lazy("ends with")), - (IS_EMPTY, gettext_lazy("is empty")), - (IS_NOT_EMPTY, gettext_lazy("is not empty")), - (IS_NULL, gettext_lazy("is NULL")), - (IS_NOT_NULL, gettext_lazy("is not NULL")), + (STARTS_NOT, gettext_lazy("does not start with")), (FUZZY, gettext_lazy("is like")), (FUZZY_NOT, gettext_lazy("is not like")), (PHONETIC, gettext_lazy("sounds like")), (PHONETIC_NOT, gettext_lazy("does not sound like")), - (LESS_THAN, gettext_lazy("is before")), - (GREATER_THAN, gettext_lazy("is after")), ) MULTI_SELECT_CHOICES = ( @@ -204,20 +265,23 @@ class FilterMatchType: (EXACT, gettext_lazy("equals")), (IS_NOT, gettext_lazy("does not equal")), (LESS_THAN, gettext_lazy("less than")), - (ENDS, gettext_lazy("less than or equal to")), + (LESS_THAN_EQUAL, gettext_lazy("less than or equal to")), (GREATER_THAN, gettext_lazy("greater than")), - (STARTS, gettext_lazy("greater than or equal to")), + (GREATER_THAN_EQUAL, gettext_lazy("greater than or equal to")), ) DATE_CHOICES = ( (EXACT, gettext_lazy("on")), (LESS_THAN, gettext_lazy("before")), + (LESS_THAN_EQUAL, gettext_lazy("before or on")), (GREATER_THAN, gettext_lazy("after")), + (GREATER_THAN_EQUAL, gettext_lazy("on or after")), ) class BulkEditColumnFilter(models.Model): session = models.ForeignKey(BulkEditSession, related_name="column_filters", on_delete=models.CASCADE) + filter_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True, db_index=True) index = models.IntegerField(default=0) prop_id = models.CharField(max_length=255) # case property or form question_id data_type = models.CharField( @@ -235,6 +299,95 @@ class BulkEditColumnFilter(models.Model): class Meta: ordering = ["index"] + def filter_query(self, query): + filter_query_functions = { + FilterMatchType.IS_EMPTY: lambda q: q.empty(self.prop_id), + FilterMatchType.IS_NOT_EMPTY: lambda q: q.non_null(self.prop_id), + FilterMatchType.IS_MISSING: lambda q: q.missing(self.prop_id), + FilterMatchType.IS_NOT_MISSING: lambda q: q.exists(self.prop_id), + } + if self.match_type in filter_query_functions: + query = filter_query_functions[self.match_type](query) + return query + + @staticmethod + def is_data_and_match_type_valid(match_type, data_type): + if match_type in dict(FilterMatchType.ALL_DATA_TYPES_CHOICES): + # empty / missing is always valid regardless of data type + return True + + matches_by_category = { + DataType.FILTER_CATEGORY_TEXT: dict(FilterMatchType.TEXT_CHOICES), + DataType.FILTER_CATEGORY_NUMBER: dict(FilterMatchType.NUMBER_CHOICES), + DataType.FILTER_CATEGORY_DATE: dict(FilterMatchType.DATE_CHOICES), + DataType.FILTER_CATEGORY_MULTI_SELECT: dict(FilterMatchType.MULTI_SELECT_CHOICES), + } + for category, valid_data_types in DataType.FILTER_CATEGORY_DATA_TYPES.items(): + if data_type in valid_data_types: + return match_type in matches_by_category[category] + + return False + + @staticmethod + def get_quoted_value(value): + has_single_quote = "'" in value + has_double_quote = '"' in value + if has_double_quote and has_single_quote: + # It seems our current xpath parsing library has no way of escaping quotes. + # A workaround could be to avoid xpath expression parsing altogether and have + # all match_types use `filter_query` directly, but that would require more effort. + # The option to use CaseSearchES `xpath_query` was chosen for development speed, + # acknowledging that there are limitations. We can re-evaluate this decision + # when filtering form data, as we don't have an `xpath_query` filter built for FormES. + raise UnsupportedFilterValueException( + """We cannot support both single quotes (') and double quotes (") in + a filter value at this time.""" + ) + return f'"{value}"' if has_single_quote else f"'{value}'" + + def get_xpath_expression(self): + """ + Assumption: + - data_type and match_type combination was validated by the form that created this filter + + Limitations: + - no support for multiple quote types (double and single) in the same value + - no support for special whitespace characters (tab or newline) + - no `xpath_query` support in `FormES` + + We can address limitations in later releases of this tool. + """ + match_operators = { + FilterMatchType.EXACT: '=', + FilterMatchType.IS_NOT: '!=', + FilterMatchType.LESS_THAN: '<', + FilterMatchType.LESS_THAN_EQUAL: '<=', + FilterMatchType.GREATER_THAN: '>', + FilterMatchType.GREATER_THAN_EQUAL: '>=', + } + if self.match_type in match_operators: + # we assume the data type was properly verified on creation + is_number = self.data_type in DataType.FILTER_CATEGORY_DATA_TYPES[DataType.FILTER_CATEGORY_NUMBER] + value = self.value if is_number else self.get_quoted_value(self.value) + operator = match_operators[self.match_type] + return f"{self.prop_id} {operator} {value}" + + match_expression = { + FilterMatchType.STARTS: lambda x: f'starts-with({self.prop_id}, {x})', + FilterMatchType.STARTS_NOT: lambda x: f'not(starts-with({self.prop_id}, {x}))', + FilterMatchType.FUZZY: lambda x: f'fuzzy-match({self.prop_id}, {x})', + FilterMatchType.FUZZY_NOT: lambda x: f'not(fuzzy-match({self.prop_id}, {x}))', + FilterMatchType.PHONETIC: lambda x: f'phonetic-match({self.prop_id}, {x})', + FilterMatchType.PHONETIC_NOT: lambda x: f'not(phonetic-match({self.prop_id}, {x}))', + FilterMatchType.IS_ANY: lambda x: f'selected-any({self.prop_id}, {x})', + FilterMatchType.IS_NOT_ANY: lambda x: f'not(selected-any({self.prop_id}, {x}))', + FilterMatchType.IS_ALL: lambda x: f'selected-all({self.prop_id}, {x})', + FilterMatchType.IS_NOT_ALL: lambda x: f'not(selected-all({self.prop_id}, {x}))', + } + if self.match_type in match_expression: + quoted_value = self.get_quoted_value(self.value) + return match_expression[self.match_type](quoted_value) + class PinnedFilterType: CASE_OWNERS = 'case_owners' @@ -285,6 +438,7 @@ def create_default_filters(cls, session): class BulkEditColumn(models.Model): session = models.ForeignKey(BulkEditSession, related_name="columns", on_delete=models.CASCADE) + column_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True, db_index=True) index = models.IntegerField(default=0) prop_id = models.CharField(max_length=255) # case property or form question_id label = models.CharField(max_length=255) diff --git a/corehq/apps/data_cleaning/tests/test_filters.py b/corehq/apps/data_cleaning/tests/test_filters.py new file mode 100644 index 000000000000..1d4190651fcf --- /dev/null +++ b/corehq/apps/data_cleaning/tests/test_filters.py @@ -0,0 +1,547 @@ +import pytest +from testil import eq +from django.test import TestCase + +from corehq.apps.data_cleaning.exceptions import UnsupportedFilterValueException +from corehq.apps.data_cleaning.models import ( + BulkEditColumnFilter, + DataType, + FilterMatchType, +) +from corehq.apps.es import CaseSearchES +from corehq.apps.es.case_search import case_search_adapter +from corehq.apps.es.tests.utils import ( + case_search_es_setup, + es_test, +) +from corehq.apps.hqwebapp.tests.tables.generator import get_case_blocks +from corehq.form_processor.tests.utils import FormProcessorTestUtils + + +@pytest.mark.parametrize("category, valid_match_types", [ + (DataType.FILTER_CATEGORY_TEXT, ( + FilterMatchType.EXACT, + FilterMatchType.IS_NOT, + FilterMatchType.STARTS, + FilterMatchType.STARTS_NOT, + FilterMatchType.FUZZY, + FilterMatchType.FUZZY_NOT, + FilterMatchType.PHONETIC, + FilterMatchType.PHONETIC_NOT, + FilterMatchType.IS_EMPTY, + FilterMatchType.IS_NOT_EMPTY, + FilterMatchType.IS_MISSING, + FilterMatchType.IS_NOT_MISSING, + )), + (DataType.FILTER_CATEGORY_NUMBER, ( + FilterMatchType.EXACT, + FilterMatchType.IS_NOT, + FilterMatchType.LESS_THAN, + FilterMatchType.LESS_THAN_EQUAL, + FilterMatchType.GREATER_THAN, + FilterMatchType.GREATER_THAN_EQUAL, + FilterMatchType.IS_EMPTY, + FilterMatchType.IS_NOT_EMPTY, + FilterMatchType.IS_MISSING, + FilterMatchType.IS_NOT_MISSING, + )), + (DataType.FILTER_CATEGORY_DATE, ( + FilterMatchType.EXACT, + FilterMatchType.LESS_THAN, + FilterMatchType.LESS_THAN_EQUAL, + FilterMatchType.GREATER_THAN, + FilterMatchType.GREATER_THAN_EQUAL, + FilterMatchType.IS_EMPTY, + FilterMatchType.IS_NOT_EMPTY, + FilterMatchType.IS_MISSING, + FilterMatchType.IS_NOT_MISSING, + )), + (DataType.FILTER_CATEGORY_MULTI_SELECT, ( + FilterMatchType.IS_ANY, + FilterMatchType.IS_NOT_ANY, + FilterMatchType.IS_ALL, + FilterMatchType.IS_NOT_ALL, + FilterMatchType.IS_EMPTY, + FilterMatchType.IS_NOT_EMPTY, + FilterMatchType.IS_MISSING, + FilterMatchType.IS_NOT_MISSING, + )), +]) +def test_data_and_match_type_validation(category, valid_match_types): + for data_type in DataType.FILTER_CATEGORY_DATA_TYPES[category]: + for match_type, _ in FilterMatchType.ALL_CHOICES: + is_valid = BulkEditColumnFilter.is_data_and_match_type_valid( + match_type, data_type + ) + if match_type in valid_match_types: + eq(is_valid, True, + text=f"FilterMatchType {match_type} should support DataType {data_type}") + else: + eq(is_valid, False, + text=f"FilterMatchType {match_type} should NOT support DataType {data_type}") + + +@es_test(requires=[case_search_adapter], setup_class=True) +class BulkEditColumnFilterQueryTests(TestCase): + domain = 'column-test-filters' + + @classmethod + def setUpClass(cls): + super().setUpClass() + case_search_es_setup(cls.domain, get_case_blocks()) + + @classmethod + def tearDownClass(cls): + FormProcessorTestUtils.delete_all_cases() + super().tearDownClass() + + def test_filter_query_is_empty(self): + query = CaseSearchES().domain(self.domain) + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=data_type, + match_type=FilterMatchType.IS_EMPTY, + ) + filtered_query = column_filter.filter_query(query) + expected_query = query.empty('soil_contents') + self.assertEqual( + filtered_query.es_query, expected_query.es_query, + msg=f"{data_type} failed to filter the query " + f"properly for FilterMatchType.is_empty" + ) + + def test_filter_query_is_not_empty(self): + query = CaseSearchES().domain(self.domain) + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=data_type, + match_type=FilterMatchType.IS_NOT_EMPTY, + ) + filtered_query = column_filter.filter_query(query) + expected_query = query.non_null('soil_contents') + self.assertEqual( + filtered_query.es_query, expected_query.es_query, + msg=f"{data_type} failed to filter the query " + f"properly for FilterMatchType.is_empty" + ) + + def test_filter_query_is_missing(self): + query = CaseSearchES().domain(self.domain) + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=data_type, + match_type=FilterMatchType.IS_MISSING, + ) + filtered_query = column_filter.filter_query(query) + expected_query = query.missing('soil_contents') + self.assertEqual( + filtered_query.es_query, expected_query.es_query, + msg=f"{data_type} failed to filter the query " + f"properly for FilterMatchType.is_empty" + ) + + def test_filter_query_is_not_missing(self): + query = CaseSearchES().domain(self.domain) + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=data_type, + match_type=FilterMatchType.IS_NOT_MISSING, + ) + filtered_query = column_filter.filter_query(query) + expected_query = query.exists('soil_contents') + self.assertEqual( + filtered_query.es_query, expected_query.es_query, + msg=f"{data_type} failed to filter the query " + f"properly for FilterMatchType.is_empty" + ) + + def filter_query_remains_unchanged_for_other_match_types(self): + query = CaseSearchES().domain(self.domain) + for match_type, _ in FilterMatchType.ALL_CHOICES: + if match_type in dict(FilterMatchType.ALL_DATA_TYPES_CHOICES): + continue + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=data_type, + match_type=match_type, + ) + filtered_query = column_filter.filter_query(query) + self.assertEqual( + filtered_query.es_query, query.es_query, + msg=f"filtered query should remain unchanged for {data_type}, {match_type}" + ) + + +class BulkEditColumnFilterXpathTest(TestCase): + + def test_exact_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.EXACT, + value='Riny Iola', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "name = 'Riny Iola'" + ) + + def test_single_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.EXACT, + value="Happy's", + ) + self.assertEqual( + column_filter.get_quoted_value(column_filter.value), + '''"Happy's"''' + ) + self.assertEqual( + column_filter.get_xpath_expression(), + '''name = "Happy's"''' + ) + + def test_double_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.EXACT, + value='Zesty "orange" Flora', + ) + self.assertEqual( + column_filter.get_quoted_value(column_filter.value), + """'Zesty "orange" Flora'""" + ) + self.assertEqual( + column_filter.get_xpath_expression(), + """name = 'Zesty "orange" Flora'""" + ) + + def test_mixed_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.EXACT, + value='''Zesty's "orange" Flora''', + ) + with self.assertRaises(UnsupportedFilterValueException): + column_filter.get_quoted_value(column_filter.value) + with self.assertRaises(UnsupportedFilterValueException): + column_filter.get_xpath_expression() + + def test_exact_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='height_cm', + data_type=DataType.DECIMAL, + match_type=FilterMatchType.EXACT, + value='11.2' + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "height_cm = 11.2" + ) + + def test_exact_date_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='watered_on', + data_type=DataType.DATE, + match_type=FilterMatchType.EXACT, + value='2024-12-11' + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "watered_on = '2024-12-11'" + ) + + def test_is_not_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='phone_num', + data_type=DataType.PHONE_NUMBER, + match_type=FilterMatchType.IS_NOT, + value='11245523233', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "phone_num != '11245523233'" + ) + + def test_is_not_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='num_leaves', + data_type=DataType.INTEGER, + match_type=FilterMatchType.IS_NOT, + value='5', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "num_leaves != 5" + ) + + def test_less_than_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='height_cm', + data_type=DataType.DECIMAL, + match_type=FilterMatchType.LESS_THAN, + value='12.35', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "height_cm < 12.35" + ) + + def test_less_than_date_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='watered_on', + data_type=DataType.DATETIME, + match_type=FilterMatchType.LESS_THAN, + value='2025-02-03 16:43', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "watered_on < '2025-02-03 16:43'" + ) + + def test_less_than_equal_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='weight_kg', + data_type=DataType.DECIMAL, + match_type=FilterMatchType.LESS_THAN_EQUAL, + value='35.5', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "weight_kg <= 35.5" + ) + + def test_less_than_equal_date_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='last_modified', + data_type=DataType.DATETIME, + match_type=FilterMatchType.LESS_THAN_EQUAL, + value='2025-02-20 16:55', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "last_modified <= '2025-02-20 16:55'" + ) + + def test_greater_than_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='amount', + data_type=DataType.INTEGER, + match_type=FilterMatchType.GREATER_THAN, + value='15', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "amount > 15" + ) + + def test_greater_than_date_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='modified_on', + data_type=DataType.DATE, + match_type=FilterMatchType.GREATER_THAN, + value='2025-01-22', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "modified_on > '2025-01-22'" + ) + + def test_greater_than_equal_number_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='num_branches', + data_type=DataType.INTEGER, + match_type=FilterMatchType.GREATER_THAN_EQUAL, + value='23', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "num_branches >= 23" + ) + + def test_greater_than_equal_date_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='submitted_on', + data_type=DataType.DATE, + match_type=FilterMatchType.GREATER_THAN_EQUAL, + value='2025-03-03', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "submitted_on >= '2025-03-03'" + ) + + def test_starts_with_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.STARTS, + value='st', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "starts-with(name, 'st')" + ) + + def test_starts_with_text_single_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.STARTS, + value="st's", + ) + self.assertEqual( + column_filter.get_xpath_expression(), + """starts-with(name, "st's")""" + ) + + def test_starts_with_text_double_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.STARTS, + value='st"s', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + """starts-with(name, 'st"s')""" + ) + + def test_starts_text_mixed_quote_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='name', + data_type=DataType.TEXT, + match_type=FilterMatchType.STARTS, + value='''st"s m'd''', + ) + with self.assertRaises(UnsupportedFilterValueException): + column_filter.get_xpath_expression() + + def test_starts_not_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='favorite_park', + data_type=DataType.TEXT, + match_type=FilterMatchType.STARTS_NOT, + value='fo', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "not(starts-with(favorite_park, 'fo'))" + ) + + def test_fuzzy_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='pot_type', + data_type=DataType.TEXT, + match_type=FilterMatchType.FUZZY, + value='ceremic', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "fuzzy-match(pot_type, 'ceremic')" + ) + + def test_fuzzy_not_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='pot_type', + data_type=DataType.TEXT, + match_type=FilterMatchType.FUZZY_NOT, + value='ceremic', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "not(fuzzy-match(pot_type, 'ceremic'))" + ) + + def test_phonetic_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='light_level', + data_type=DataType.TEXT, + match_type=FilterMatchType.PHONETIC, + value='hi', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "phonetic-match(light_level, 'hi')" + ) + + def test_phonetic_not_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='light_level', + data_type=DataType.TEXT, + match_type=FilterMatchType.PHONETIC_NOT, + value='hi', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "not(phonetic-match(light_level, 'hi'))" + ) + + def test_is_any_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='health_issues', + data_type=DataType.MULTIPLE_OPTION, + match_type=FilterMatchType.IS_ANY, + value='yellow_leaves root_rot', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "selected-any(health_issues, 'yellow_leaves root_rot')" + ) + + def test_is_not_any_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='health_issues', + data_type=DataType.MULTIPLE_OPTION, + match_type=FilterMatchType.IS_NOT_ANY, + value='fungus root_rot', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "not(selected-any(health_issues, 'fungus root_rot'))" + ) + + def test_is_all_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=DataType.MULTIPLE_OPTION, + match_type=FilterMatchType.IS_ALL, + value='bark worm_castings', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "selected-all(soil_contents, 'bark worm_castings')" + ) + + def test_is_not_all_text_xpath(self): + column_filter = BulkEditColumnFilter( + prop_id='soil_contents', + data_type=DataType.MULTIPLE_OPTION, + match_type=FilterMatchType.IS_NOT_ALL, + value='bark worm_castings', + ) + self.assertEqual( + column_filter.get_xpath_expression(), + "not(selected-all(soil_contents, 'bark worm_castings'))" + ) + + def test_value_match_types_return_none_all_data_types_xpath(self): + for match_type, _ in FilterMatchType.ALL_DATA_TYPES_CHOICES: + for data_type, _ in DataType.CHOICES: + column_filter = BulkEditColumnFilter( + prop_id='a_property', + data_type=data_type, + match_type=match_type, + ) + self.assertIsNone( + column_filter.get_xpath_expression(), + msg=f"{match_type} for {data_type} should not return an xpath expression" + ) diff --git a/corehq/apps/data_cleaning/tests/test_session.py b/corehq/apps/data_cleaning/tests/test_session.py index 13f7bf5772c3..7c0e994f7b98 100644 --- a/corehq/apps/data_cleaning/tests/test_session.py +++ b/corehq/apps/data_cleaning/tests/test_session.py @@ -3,9 +3,22 @@ from django.contrib.auth.models import User from django.test import TestCase -from corehq.apps.data_cleaning.models import BulkEditSession, BulkEditSessionType +from corehq.apps.data_cleaning.models import ( + BulkEditSession, + BulkEditSessionType, + DataType, + FilterMatchType, +) from corehq.apps.domain.shortcuts import create_domain +from corehq.apps.es import CaseSearchES +from corehq.apps.es.case_search import case_search_adapter +from corehq.apps.es.tests.utils import ( + case_search_es_setup, + es_test, +) +from corehq.apps.hqwebapp.tests.tables.generator import get_case_blocks from corehq.apps.users.models import WebUser +from corehq.form_processor.tests.utils import FormProcessorTestUtils class BulkEditSessionTest(TestCase): @@ -19,7 +32,7 @@ def setUpClass(cls): cls.addClassCleanup(cls.domain.delete) cls.web_user = WebUser.create( - cls.domain.name, 'b@vaultwax.com', 'testpwd', None, None + cls.domain.name, 'tester@datacleaning.org', 'testpwd', None, None ) cls.django_user = User.objects.get(username=cls.web_user.username) cls.addClassCleanup(cls.web_user.delete, cls.domain.name, deleted_by=None) @@ -89,3 +102,123 @@ def test_restart_case_session(self): old_session_id = old_session.session_id new_session = BulkEditSession.restart_case_session(self.django_user, self.domain_name, self.case_type) self.assertNotEqual(old_session_id, new_session.session_id) + + +@es_test(requires=[case_search_adapter], setup_class=True) +class BulkEditSessionFilteredQuerysetTests(TestCase): + domain_name = 'session-test-queryset' + + @classmethod + def setUpClass(cls): + super().setUpClass() + case_search_es_setup(cls.domain_name, get_case_blocks()) + + cls.domain = create_domain(cls.domain_name) + cls.addClassCleanup(cls.domain.delete) + + cls.web_user = WebUser.create( + cls.domain.name, 'tester@datacleaning.org', 'testpwd', None, None + ) + cls.django_user = User.objects.get(username=cls.web_user.username) + cls.addClassCleanup(cls.web_user.delete, cls.domain.name, deleted_by=None) + + cls.case_type = 'child' + + @classmethod + def tearDownClass(cls): + FormProcessorTestUtils.delete_all_cases() + super().tearDownClass() + + def test_add_column_filters(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('watered_on', DataType.DATE, FilterMatchType.IS_NOT_MISSING) + session.add_column_filter('name', DataType.TEXT, FilterMatchType.PHONETIC, "lowkey") + session.add_column_filter('num_leaves', DataType.INTEGER, FilterMatchType.GREATER_THAN, "2") + session.add_column_filter('pot_type', DataType.DATE, FilterMatchType.IS_EMPTY) + session.add_column_filter('height_cm', DataType.DECIMAL, FilterMatchType.LESS_THAN_EQUAL, "11.0") + column_filters = session.column_filters.all() + for index, prop_id in enumerate(['watered_on', 'name', 'num_leaves', 'pot_type', 'height_cm']): + self.assertEqual(column_filters[index].prop_id, prop_id) + self.assertEqual(column_filters[index].index, index) + + def test_reorder_wrong_number_of_filter_ids_raises_error(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('watered_on', DataType.DATE, FilterMatchType.IS_NOT_MISSING) + session.add_column_filter('name', DataType.TEXT, FilterMatchType.PHONETIC, "lowkey") + session.add_column_filter('num_leaves', DataType.INTEGER, FilterMatchType.GREATER_THAN, "2") + session.add_column_filter('pot_type', DataType.DATE, FilterMatchType.IS_EMPTY) + session.add_column_filter('height_cm', DataType.DECIMAL, FilterMatchType.LESS_THAN_EQUAL, "11.0") + column_filters = session.column_filters.all() + new_order = [column_filters[1].filter_id, column_filters[2].filter_id] + with self.assertRaises(ValueError): + session.reorder_column_filters(new_order) + + def test_reorder_column_filters(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('watered_on', DataType.DATE, FilterMatchType.IS_NOT_MISSING) + session.add_column_filter('name', DataType.TEXT, FilterMatchType.PHONETIC, "lowkey") + session.add_column_filter('num_leaves', DataType.INTEGER, FilterMatchType.GREATER_THAN, "2") + session.add_column_filter('pot_type', DataType.DATE, FilterMatchType.IS_EMPTY) + session.add_column_filter('height_cm', DataType.DECIMAL, FilterMatchType.LESS_THAN_EQUAL, "11.0") + column_filters = session.column_filters.all() + new_order = [ + column_filters[1].filter_id, + column_filters[0].filter_id, + column_filters[2].filter_id, + column_filters[4].filter_id, + column_filters[3].filter_id, + ] + session.reorder_column_filters(new_order) + reordered_prop_ids = [c.prop_id for c in session.column_filters.all()] + self.assertEqual( + reordered_prop_ids, + ['name', 'watered_on', 'num_leaves', 'height_cm', 'pot_type'] + ) + + def test_get_queryset_multiple_column_filters(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('watered_on', DataType.DATE, FilterMatchType.IS_NOT_MISSING) + session.add_column_filter('name', DataType.TEXT, FilterMatchType.PHONETIC, 'lowkey') + session.add_column_filter('num_leaves', DataType.INTEGER, FilterMatchType.GREATER_THAN, '2') + session.add_column_filter('pot_type', DataType.MULTIPLE_OPTION, FilterMatchType.IS_EMPTY) + session.add_column_filter('height_cm', DataType.DECIMAL, FilterMatchType.LESS_THAN_EQUAL, '11.1') + query = session.get_queryset() + expected_query = ( + CaseSearchES() + .domain(self.domain_name) + .case_type(self.case_type) + .exists('watered_on') + .empty('pot_type') + .xpath_query( + self.domain_name, + "phonetic-match(name, 'lowkey') and num_leaves > 2 and height_cm <= 11.1" + ) + ) + self.assertEqual(query.es_query, expected_query.es_query) + + def test_get_queryset_column_filters_no_xpath(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('watered_on', DataType.DATE, FilterMatchType.IS_NOT_MISSING) + query = session.get_queryset() + expected_query = ( + CaseSearchES() + .domain(self.domain_name) + .case_type(self.case_type) + .exists('watered_on') + ) + self.assertEqual(query.es_query, expected_query.es_query) + + def test_get_queryset_column_filters_xpath_only(self): + session = BulkEditSession.new_case_session(self.django_user, self.domain_name, self.case_type) + session.add_column_filter('num_leaves', DataType.INTEGER, FilterMatchType.GREATER_THAN, '2') + query = session.get_queryset() + expected_query = ( + CaseSearchES() + .domain(self.domain_name) + .case_type(self.case_type) + .xpath_query( + self.domain_name, + "num_leaves > 2" + ) + ) + self.assertEqual(query.es_query, expected_query.es_query) diff --git a/corehq/apps/data_cleaning/views/tables.py b/corehq/apps/data_cleaning/views/tables.py index 3c51916efb83..375f7348a9e7 100644 --- a/corehq/apps/data_cleaning/views/tables.py +++ b/corehq/apps/data_cleaning/views/tables.py @@ -12,7 +12,6 @@ ) from corehq.apps.domain.decorators import LoginAndDomainMixin from corehq.apps.domain.views import DomainViewMixin -from corehq.apps.es import CaseSearchES from corehq.apps.hqwebapp.decorators import use_bootstrap5 from corehq.apps.hqwebapp.tables.pagination import SelectablePaginatedTableView @@ -37,10 +36,6 @@ def session(self): except BulkEditSession.DoesNotExist: raise Http404(_("Data cleaning session was not found.")) - @property - def case_type(self): - return self.session.identifier - @property def session_id(self): return self.kwargs['session_id'] @@ -54,7 +49,7 @@ def get_table_kwargs(self): } def get_queryset(self): - return CaseSearchES().domain(self.domain).case_type(self.case_type) + return self.session.get_queryset() class CaseCleaningTasksTableView(BaseDataCleaningTableView): diff --git a/migrations.lock b/migrations.lock index b0cfb3684167..fd23b69150e2 100644 --- a/migrations.lock +++ b/migrations.lock @@ -314,6 +314,7 @@ data_analytics data_cleaning 0001_initial 0002_update_fields_and_ordering + 0003_column_filter_unique_ids_match_type_updates data_dictionary 0001_squashed_0002_auto_20161116_2209 0002_auto_20161118_1537