Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backend logic for column filtering #35846

Merged
merged 19 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
23746c5
CaseSearchES xpath_query does not support ends-with, only starts-with
biyeun Feb 25, 2025
fc65bc9
empty vs null is not supported in CaseSearchES
biyeun Feb 25, 2025
2f96aa1
make separate lte, gte match type choices
biyeun Feb 25, 2025
3986d67
move empty, missing match types to own category applying to all data …
biyeun Feb 25, 2025
a58d11d
sort DataTypes into filter categories so that correct filter choices …
biyeun Feb 25, 2025
a60b119
add validation utility (for forms) for data_type match_type combinations
biyeun Feb 25, 2025
d4ff670
add filter_query utility to BulkEditColumnFilters
biyeun Feb 25, 2025
cb26c25
add utility for retrieving xpath_expression for BulkEditColumnFilter
biyeun Feb 25, 2025
df53fe4
add utility to session for adding column filters
biyeun Feb 25, 2025
7073f98
add unique ids to reorderable BulkEdit Columns and Filters
biyeun Feb 25, 2025
c2d86c2
add a migration for all of the most recent model updates
biyeun Feb 25, 2025
f96a28d
add utility to session for reordering column filters
biyeun Feb 25, 2025
a6e45ff
add utility for getting the queryset from the session
biyeun Feb 25, 2025
7b28932
update table view to use session.get_queryset()
biyeun Feb 25, 2025
a02022a
replacing with more fake email (whoops)
biyeun Feb 25, 2025
312c2cd
update docstring
biyeun Feb 26, 2025
234198e
update quote style for consistency
biyeun Feb 26, 2025
6550776
update tests for clarity
biyeun Feb 26, 2025
685294b
Merge branch 'master' of github.com:dimagi/commcare-hq into bmb/dc/fi…
biyeun Feb 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions corehq/apps/data_cleaning/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
class UnsupportedActionException(Exception):
"""Raised when an unknown action is encountered"""


class UnsupportedFilterValueException(Exception):
"""
Raised when a BulkEditColumnFilter has a value that is unsupported by
its FilterMatchType and DataType combination. This is rare,
as the filter creation form should catch most of the issues.
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Generated by Django 4.2.18 on 2025-02-25 12:35

from django.db import migrations, models
import uuid


def delete_columns_and_filters(apps, schema_editor):
# this migration was written when none of these models are used in production,
# or even generated by a non-local UI
# Therefore, it's safe to just delete everything and start over.
BulkEditColumnFilter = apps.get_model("data_cleaning", "BulkEditColumnFilter")
BulkEditColumnFilter.objects.all().delete()
BulkEditColumn = apps.get_model("data_cleaning", "BulkEditColumn")
BulkEditColumn.objects.all().delete()


class Migration(migrations.Migration):

dependencies = [
('data_cleaning', '0002_update_fields_and_ordering'),
]

operations = [
migrations.RunPython(delete_columns_and_filters, reverse_code=migrations.RunPython.noop),
migrations.AddField(
model_name='bulkeditcolumn',
name='column_id',
field=models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, unique=True),
),
migrations.AddField(
model_name='bulkeditcolumnfilter',
name='filter_id',
field=models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, unique=True),
),
migrations.AlterField(
model_name='bulkeditcolumnfilter',
name='match_type',
field=models.CharField(choices=[('exact', 'exact'), ('is_not', 'is_not'), ('starts', 'starts'), ('starts_not', 'starts_not'), ('is_empty', 'is_empty'), ('is_not_empty', 'is_not_empty'), ('missing', 'missing'), ('not_missing', 'not_missing'), ('fuzzy', 'fuzzy'), ('not_fuzzy', 'not_fuzzy'), ('phonetic', 'phonetic'), ('not_phonetic', 'not_phonetic'), ('lt', 'lt'), ('gt', 'gt'), ('lte', 'lte'), ('gte', 'gte'), ('is_any', 'is_any'), ('is_not_any', 'is_not_any'), ('is_all', 'is_all'), ('is_not_all', 'is_not_all')], default='exact', max_length=12),
),
]
186 changes: 170 additions & 16 deletions corehq/apps/data_cleaning/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from django.utils.translation import gettext_lazy, gettext as _

from corehq.apps.case_search.const import METADATA_IN_REPORTS
from corehq.apps.data_cleaning.exceptions import UnsupportedActionException
from corehq.apps.data_cleaning.exceptions import (
UnsupportedActionException,
UnsupportedFilterValueException,
)
from corehq.apps.es import CaseSearchES


class BulkEditSessionType:
Expand Down Expand Up @@ -96,6 +100,44 @@ def status(self):
return "in progress"
return "pending"

def add_column_filter(self, prop_id, data_type, match_type, value=None):
BulkEditColumnFilter.objects.create(
session=self,
index=self.column_filters.count(),
prop_id=prop_id,
data_type=data_type,
match_type=match_type,
value=value,
)

def reorder_column_filters(self, filter_ids):
"""
This updates the order of column filters for this session
:param filter_ids: list of uuids matching filter_id field of BulkEditColumnFilters
"""
if len(filter_ids) != self.column_filters.count():
raise ValueError("the lengths of column_ids and available column filters do not match")
for index, filter_id in enumerate(filter_ids):
column_filter = self.column_filters.get(filter_id=filter_id)
column_filter.index = index
column_filter.save()

def get_queryset(self):
query = CaseSearchES().domain(self.domain).case_type(self.identifier)
query = self._apply_column_filters(query)
return query

def _apply_column_filters(self, query):
xpath_expressions = []
for column_filter in self.column_filters.all():
query = column_filter.filter_query(query)
column_xpath = column_filter.get_xpath_expression()
if column_xpath is not None:
xpath_expressions.append(column_xpath)
if xpath_expressions:
query = query.xpath_query(self.domain, " and ".join(xpath_expressions))
return query


class DataType:
TEXT = 'text'
Expand Down Expand Up @@ -126,19 +168,31 @@ class DataType:
(PASSWORD, gettext_lazy("Password")),
)

FILTER_CATEGORY_TEXT = 'filter_text'
FILTER_CATEGORY_NUMBER = 'filter_number'
FILTER_CATEGORY_DATE = 'filter_date'
FILTER_CATEGORY_MULTI_SELECT = 'filter_multi_select'

FILTER_CATEGORY_DATA_TYPES = {
FILTER_CATEGORY_TEXT: (TEXT, PHONE_NUMBER, BARCODE, PASSWORD, GPS, SINGLE_OPTION, TIME,),
FILTER_CATEGORY_NUMBER: (INTEGER, DECIMAL,),
FILTER_CATEGORY_DATE: (DATE, DATETIME,),
FILTER_CATEGORY_MULTI_SELECT: (MULTIPLE_OPTION,),
}


class FilterMatchType:
EXACT = "exact"
IS_NOT = "is_not"

STARTS = "starts"
ENDS = "ends"
STARTS_NOT = "starts_not"

IS_EMPTY = "is_empty" # empty string
IS_NOT_EMPTY = "is_not_empty"

IS_NULL = "is_null" # un-set
IS_NOT_NULL = "is_not_null"
IS_MISSING = "missing" # un-set
IS_NOT_MISSING = "not_missing"

FUZZY = "fuzzy" # will use fuzzy-match from CQL
FUZZY_NOT = "not_fuzzy" # will use not(fuzzy-match()) from CQL
Expand All @@ -149,6 +203,9 @@ class FilterMatchType:
LESS_THAN = "lt"
GREATER_THAN = "gt"

LESS_THAN_EQUAL = "lte"
GREATER_THAN_EQUAL = "gte"

IS_ANY = "is_any" # we will use selected-any from CQL
IS_NOT_ANY = "is_not_any" # we will use not(selected-any()) from CQL

Expand All @@ -159,38 +216,42 @@ class FilterMatchType:
(EXACT, EXACT),
(IS_NOT, IS_NOT),
(STARTS, STARTS),
(ENDS, ENDS),
(STARTS_NOT, STARTS_NOT),
(IS_EMPTY, IS_EMPTY),
(IS_NOT_EMPTY, IS_NOT_EMPTY),
(IS_NULL, IS_NULL),
(IS_NOT_NULL, IS_NOT_NULL),
(IS_MISSING, IS_MISSING),
(IS_NOT_MISSING, IS_NOT_MISSING),
(FUZZY, FUZZY),
(FUZZY_NOT, FUZZY_NOT),
(PHONETIC, PHONETIC),
(PHONETIC_NOT, PHONETIC_NOT),
(LESS_THAN, LESS_THAN),
(GREATER_THAN, GREATER_THAN),
(LESS_THAN_EQUAL, LESS_THAN_EQUAL),
(GREATER_THAN_EQUAL, GREATER_THAN_EQUAL),
(IS_ANY, IS_ANY),
(IS_NOT_ANY, IS_NOT_ANY),
(IS_ALL, IS_ALL),
(IS_NOT_ALL, IS_NOT_ALL),
)

# choices valid for all data types
ALL_DATA_TYPES_CHOICES = (
(IS_EMPTY, gettext_lazy("is empty")),
(IS_NOT_EMPTY, gettext_lazy("is not empty")),
(IS_MISSING, gettext_lazy("is missing")),
(IS_NOT_MISSING, gettext_lazy("is not missing")),
)

TEXT_CHOICES = (
(EXACT, gettext_lazy("is exactly")),
(IS_NOT, gettext_lazy("is not")),
(STARTS, gettext_lazy("starts with")),
(ENDS, gettext_lazy("ends with")),
(IS_EMPTY, gettext_lazy("is empty")),
(IS_NOT_EMPTY, gettext_lazy("is not empty")),
(IS_NULL, gettext_lazy("is NULL")),
(IS_NOT_NULL, gettext_lazy("is not NULL")),
(STARTS_NOT, gettext_lazy("does not start with")),
(FUZZY, gettext_lazy("is like")),
(FUZZY_NOT, gettext_lazy("is not like")),
(PHONETIC, gettext_lazy("sounds like")),
(PHONETIC_NOT, gettext_lazy("does not sound like")),
(LESS_THAN, gettext_lazy("is before")),
(GREATER_THAN, gettext_lazy("is after")),
)

MULTI_SELECT_CHOICES = (
Expand All @@ -204,20 +265,23 @@ class FilterMatchType:
(EXACT, gettext_lazy("equals")),
(IS_NOT, gettext_lazy("does not equal")),
(LESS_THAN, gettext_lazy("less than")),
(ENDS, gettext_lazy("less than or equal to")),
(LESS_THAN_EQUAL, gettext_lazy("less than or equal to")),
(GREATER_THAN, gettext_lazy("greater than")),
(STARTS, gettext_lazy("greater than or equal to")),
(GREATER_THAN_EQUAL, gettext_lazy("greater than or equal to")),
)

DATE_CHOICES = (
(EXACT, gettext_lazy("on")),
(LESS_THAN, gettext_lazy("before")),
(LESS_THAN_EQUAL, gettext_lazy("before or on")),
(GREATER_THAN, gettext_lazy("after")),
(GREATER_THAN_EQUAL, gettext_lazy("on or after")),
)


class BulkEditColumnFilter(models.Model):
session = models.ForeignKey(BulkEditSession, related_name="column_filters", on_delete=models.CASCADE)
filter_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True, db_index=True)
index = models.IntegerField(default=0)
prop_id = models.CharField(max_length=255) # case property or form question_id
data_type = models.CharField(
Expand All @@ -235,6 +299,95 @@ class BulkEditColumnFilter(models.Model):
class Meta:
ordering = ["index"]

def filter_query(self, query):
filter_query_functions = {
FilterMatchType.IS_EMPTY: lambda q: q.empty(self.prop_id),
FilterMatchType.IS_NOT_EMPTY: lambda q: q.non_null(self.prop_id),
FilterMatchType.IS_MISSING: lambda q: q.missing(self.prop_id),
FilterMatchType.IS_NOT_MISSING: lambda q: q.exists(self.prop_id),
}
if self.match_type in filter_query_functions:
query = filter_query_functions[self.match_type](query)
return query

@staticmethod
def is_data_and_match_type_valid(match_type, data_type):
if match_type in dict(FilterMatchType.ALL_DATA_TYPES_CHOICES):
# empty / missing is always valid regardless of data type
return True

matches_by_category = {
DataType.FILTER_CATEGORY_TEXT: dict(FilterMatchType.TEXT_CHOICES),
DataType.FILTER_CATEGORY_NUMBER: dict(FilterMatchType.NUMBER_CHOICES),
DataType.FILTER_CATEGORY_DATE: dict(FilterMatchType.DATE_CHOICES),
DataType.FILTER_CATEGORY_MULTI_SELECT: dict(FilterMatchType.MULTI_SELECT_CHOICES),
}
for category, valid_data_types in DataType.FILTER_CATEGORY_DATA_TYPES.items():
if data_type in valid_data_types:
return match_type in matches_by_category[category]

return False

@staticmethod
def get_quoted_value(value):
has_single_quote = "'" in value
has_double_quote = '"' in value
if has_double_quote and has_single_quote:
# It seems our current xpath parsing library has no way of escaping quotes.
# A workaround could be to avoid xpath expression parsing altogether and have
# all match_types use `filter_query` directly, but that would require more effort.
# The option to use CaseSearchES `xpath_query` was chosen for development speed,
# acknowledging that there are limitations. We can re-evaluate this decision
# when filtering form data, as we don't have an `xpath_query` filter built for FormES.
raise UnsupportedFilterValueException(
"""We cannot support both single quotes (') and double quotes (") in
a filter value at this time."""
)
return f'"{value}"' if has_single_quote else f"'{value}'"

def get_xpath_expression(self):
"""
Assumption:
- data_type and match_type combination was validated by the form that created this filter

Limitations:
- no support for multiple quote types (double and single) in the same value
- no support for special whitespace characters (tab or newline)
- no `xpath_query` support in `FormES`

We can address limitations in later releases of this tool.
"""
match_operators = {
FilterMatchType.EXACT: '=',
FilterMatchType.IS_NOT: '!=',
FilterMatchType.LESS_THAN: '<',
FilterMatchType.LESS_THAN_EQUAL: '<=',
FilterMatchType.GREATER_THAN: '>',
FilterMatchType.GREATER_THAN_EQUAL: '>=',
}
if self.match_type in match_operators:
# we assume the data type was properly verified on creation
is_number = self.data_type in DataType.FILTER_CATEGORY_DATA_TYPES[DataType.FILTER_CATEGORY_NUMBER]
value = self.value if is_number else self.get_quoted_value(self.value)
operator = match_operators[self.match_type]
return f"{self.prop_id} {operator} {value}"

match_expression = {
FilterMatchType.STARTS: lambda x: f'starts-with({self.prop_id}, {x})',
FilterMatchType.STARTS_NOT: lambda x: f'not(starts-with({self.prop_id}, {x}))',
FilterMatchType.FUZZY: lambda x: f'fuzzy-match({self.prop_id}, {x})',
FilterMatchType.FUZZY_NOT: lambda x: f'not(fuzzy-match({self.prop_id}, {x}))',
FilterMatchType.PHONETIC: lambda x: f'phonetic-match({self.prop_id}, {x})',
FilterMatchType.PHONETIC_NOT: lambda x: f'not(phonetic-match({self.prop_id}, {x}))',
FilterMatchType.IS_ANY: lambda x: f'selected-any({self.prop_id}, {x})',
FilterMatchType.IS_NOT_ANY: lambda x: f'not(selected-any({self.prop_id}, {x}))',
FilterMatchType.IS_ALL: lambda x: f'selected-all({self.prop_id}, {x})',
FilterMatchType.IS_NOT_ALL: lambda x: f'not(selected-all({self.prop_id}, {x}))',
}
if self.match_type in match_expression:
quoted_value = self.get_quoted_value(self.value)
return match_expression[self.match_type](quoted_value)


class PinnedFilterType:
CASE_OWNERS = 'case_owners'
Expand Down Expand Up @@ -285,6 +438,7 @@ def create_default_filters(cls, session):

class BulkEditColumn(models.Model):
session = models.ForeignKey(BulkEditSession, related_name="columns", on_delete=models.CASCADE)
column_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True, db_index=True)
index = models.IntegerField(default=0)
prop_id = models.CharField(max_length=255) # case property or form question_id
label = models.CharField(max_length=255)
Expand Down
Loading
Loading