Skip to content

Commit 30ed6c4

Browse files
committed
refactor: drop django-tree-queries & implement array-based tree functions
The Django-Tree-Queries, together with some visibility queries, causes a ton of heavy performance problems. Therefore, we now implement the tree functionality for Scopes by using an array field to "cache" all the parent PKs of every scope. This allows us to write some much more efficient queries that don't rely on CTEs, and enables subqueries where we previously had to explode a QS to perform the same functionality.
1 parent 03c2216 commit 30ed6c4

10 files changed

+239
-538
lines changed

emeis/core/migrations/0010_scope_full_name.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def set_full_name(apps, schema_editor):
1010
scope_model = apps.get_model("emeis_core.scope")
1111
for scope in scope_model.objects.all().iterator():
1212
# explicitly trigger the set_full_name signal handler
13-
models.set_full_name(instance=scope, sender=set_full_name)
13+
models.set_full_name_and_parents(instance=scope, sender=set_full_name)
1414
scope.save()
1515

1616

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Generated by Django 3.2.25 on 2024-06-13 07:05
2+
3+
import django.contrib.postgres.fields
4+
import django.contrib.postgres.indexes
5+
from django.db import migrations, models
6+
import django.db.models.deletion
7+
8+
9+
def set_all_parents(apps, schema_editor):
10+
from emeis.core.models import set_full_name_and_parents
11+
12+
scope_model = apps.get_model("emeis_core.scope")
13+
for scope in scope_model.objects.all().iterator():
14+
# explicitly trigger the set_full_name signal handler
15+
set_full_name_and_parents(instance=scope, sender=set_all_parents)
16+
scope.save()
17+
18+
19+
class Migration(migrations.Migration):
20+
dependencies = [
21+
("emeis_core", "0011_mptt_to_tree_queries"),
22+
]
23+
24+
operations = [
25+
migrations.AlterModelOptions(
26+
name="scope",
27+
options={"ordering": ["name"]},
28+
),
29+
migrations.AddField(
30+
model_name="scope",
31+
name="all_parents",
32+
field=django.contrib.postgres.fields.ArrayField(
33+
base_field=models.UUIDField(), default=list, size=None
34+
),
35+
),
36+
migrations.AlterField(
37+
model_name="scope",
38+
name="parent",
39+
field=models.ForeignKey(
40+
blank=True,
41+
null=True,
42+
on_delete=django.db.models.deletion.CASCADE,
43+
related_name="children",
44+
to="emeis_core.scope",
45+
),
46+
),
47+
migrations.AddIndex(
48+
model_name="scope",
49+
index=django.contrib.postgres.indexes.GinIndex(
50+
fields=["all_parents"], name="emeis_core__all_par_f8231c_gin"
51+
),
52+
),
53+
migrations.RunPython(set_all_parents, migrations.RunPython.noop),
54+
]

emeis/core/models.py

+85-55
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1-
import operator
21
import unicodedata
32
import uuid
4-
from functools import reduce
53

64
from django.conf import settings
75
from django.contrib.auth.models import AbstractBaseUser, UserManager
86
from django.contrib.auth.validators import UnicodeUsernameValidator
7+
from django.contrib.postgres.aggregates import ArrayAgg
8+
from django.contrib.postgres.fields import ArrayField
9+
from django.contrib.postgres.indexes import GinIndex
10+
from django.core.exceptions import ValidationError
911
from django.db import models
12+
from django.db.models import Exists, OuterRef, Q, Subquery
1013
from django.db.models.signals import pre_save
1114
from django.dispatch import receiver
1215
from django.utils import timezone, translation
1316
from django.utils.translation import gettext_lazy as _
1417
from localized_fields.fields import LocalizedCharField, LocalizedTextField
15-
from tree_queries.models import TreeNode, TreeQuerySet
1618

1719

1820
def make_uuid():
@@ -160,59 +162,34 @@ def is_authenticated(self):
160162
return True
161163

162164

163-
class ScopeQuerySet(TreeQuerySet):
164-
# django-tree-queries sadly does not (yet?) support ancestors query
165-
# for QS - only for single nodes. So we're providing all_descendants()
166-
# and all_ancestors() queryset methods.
167-
165+
class ScopeQuerySet(models.QuerySet):
168166
def all_descendants(self, include_self=False):
169-
"""Return a QS that contains all descendants of the given QS.
167+
"""Return a QS that contains all descendants."""
168+
expr = Q(all_parents__overlap=self.aggregate(all_pks=ArrayAgg("pk"))["all_pks"])
170169

171-
This is a workaround for django-tree-queries, which currently does
172-
not support this query (it can only do it on single nodes).
170+
if include_self:
171+
expr = expr | Q(pk__in=self)
173172

174-
This is in contrast to .descendants(), which can only give the descendants
175-
of one model instance.
176-
"""
177-
descendants_q = reduce(
178-
operator.or_,
179-
[
180-
models.Q(
181-
pk__in=entry.descendants(include_self=include_self).values("pk")
182-
)
183-
for entry in self
184-
],
185-
models.Q(),
186-
)
187-
return self.model.objects.filter(descendants_q)
173+
return Scope.objects.filter(expr)
188174

189175
def all_ancestors(self, include_self=False):
190-
"""Return a QS that contains all ancestors of the given QS.
176+
"""Return a QS that contains all ancestors."""
191177

192-
This is a workaround for django-tree-queries, which currently does
193-
not support this query (it can only do it on single nodes).
178+
filter_qs = self.filter(all_parents__contains=[OuterRef("pk")])
194179

195-
This is in contrast to .ancestors(), which can only give the ancestors
196-
of one model instance.
197-
"""
180+
new_qs = Scope.objects.all().annotate(_is_ancestor=Exists(Subquery(filter_qs)))
181+
expr = Q(_is_ancestor=True)
198182

199-
descendants_q = reduce(
200-
operator.or_,
201-
[
202-
models.Q(pk__in=entry.ancestors(include_self=include_self).values("pk"))
203-
for entry in self
204-
],
205-
models.Q(),
206-
)
207-
return self.model.objects.filter(descendants_q)
183+
if include_self:
184+
expr = expr | Q(pk__in=self)
185+
186+
return new_qs.filter(expr)
208187

209188
def all_roots(self):
210-
return Scope.objects.all().filter(
211-
pk__in=[scope.ancestors(include_self=True).first().pk for scope in self]
212-
)
189+
return self.all_ancestors(include_self=True).filter(parent__isnull=True)
213190

214191

215-
class Scope(TreeNode, UUIDModel):
192+
class Scope(UUIDModel):
216193
name = LocalizedCharField(_("scope name"), blank=False, null=False, required=False)
217194

218195
full_name = LocalizedCharField(
@@ -224,26 +201,67 @@ class Scope(TreeNode, UUIDModel):
224201
)
225202
is_active = models.BooleanField(default=True)
226203

227-
objects = ScopeQuerySet.as_manager(with_tree_fields=True)
204+
objects = ScopeQuerySet.as_manager()
205+
206+
parent = models.ForeignKey(
207+
"Scope",
208+
null=True,
209+
blank=True,
210+
on_delete=models.CASCADE,
211+
related_name="children",
212+
)
213+
214+
all_parents = ArrayField(models.UUIDField(null=False), default=list)
215+
216+
def ancestors(self, include_self=False):
217+
expr = Q(pk__in=self.all_parents)
218+
if include_self:
219+
expr = expr | Q(pk=self.pk)
220+
return Scope.objects.all().filter(expr)
221+
222+
def descendants(self, include_self=False):
223+
expr = Q(all_parents__contains=[self.pk])
224+
225+
if include_self:
226+
expr = expr | Q(pk=self.pk)
227+
228+
return Scope.objects.all().filter(expr)
228229

229230
def get_root(self):
230-
return self.ancestors(include_self=True).first()
231+
if self.parent_id:
232+
return Scope.objects.get(pk=self.all_parents[0])
233+
else:
234+
return self
231235

232236
def save(self, *args, **kwargs):
233-
# django-tree-queries does validation in TreeNode.clean(), which is not
234-
# called by DRF (only by django forms), so we have to do this here
235-
self.clean()
237+
self._ensure_no_loop()
236238
return super().save(*args, **kwargs)
237239

240+
def _ensure_no_loop(self):
241+
parent = self.parent
242+
while parent:
243+
if parent == self:
244+
raise ValidationError(
245+
"A node cannot be made a descendant or parent of itself"
246+
)
247+
parent = parent.parent
248+
238249
def __str__(self):
239250
return f"{type(self).__name__} ({self.full_name}, pk={self.pk})"
240251

241252
class Meta:
242253
ordering = ["name"]
254+
indexes = [GinIndex(fields=["all_parents"])]
243255

244256

245257
@receiver(pre_save, sender=Scope)
246-
def set_full_name(instance, sender, **kwargs):
258+
def set_full_name_and_parents(instance, sender, **kwargs):
259+
"""Update the `full_name` and `all_parents` properties of the Scope.
260+
261+
The full name depends on the complete list of parents of the Scope.
262+
And to ensure correct behaviour in the queries, the `all_parents`
263+
attribute needs to be updated as well
264+
"""
247265
if kwargs.get("raw"): # pragma: no cover
248266
# Raw is set while loading fixtures. In those
249267
# cases we don't want to mess with data that
@@ -255,6 +273,9 @@ def set_full_name(instance, sender, **kwargs):
255273

256274
forced_lang = settings.EMEIS_FORCE_MODEL_LOCALE.get("scope", None)
257275

276+
old_all_parents = [*instance.all_parents]
277+
old_full_name = {**instance.full_name}
278+
258279
if forced_lang:
259280
# If scope is forced monolingual, do not fill non-forced language fields
260281
languages = [forced_lang]
@@ -263,25 +284,34 @@ def set_full_name(instance, sender, **kwargs):
263284
with translation.override(lang):
264285
instance.full_name[lang] = str(instance.name)
265286

287+
parent_ids = []
266288
parent = instance.parent
267289
while parent:
290+
parent_ids.append(parent.pk)
268291
for lang in languages:
269292
with translation.override(lang):
270293
new_fullname = f"{parent.name} {sep} {instance.full_name[lang]}"
271294
instance.full_name[lang] = new_fullname
272295
parent = parent.parent
273296

297+
# make it root-first
298+
parent_ids.reverse()
299+
instance.all_parents = parent_ids
300+
274301
if forced_lang:
275302
# Ensure only the "forced" language full_name is set, and nothing else
276303
full_name = instance.full_name[forced_lang]
277304
instance.full_name.clear()
278305
instance.full_name[forced_lang] = full_name
279306

280-
# Force update of all children (recursively)
281-
for child in instance.children.all():
282-
# save() triggers the set_full_name signal handler, which will
283-
# recurse all the way down, updating the full_name
284-
child.save()
307+
if old_all_parents != instance.all_parents or old_full_name != dict(
308+
instance.full_name
309+
):
310+
# Something changed - force update all children (recursively)
311+
for child in instance.children.all():
312+
# save() triggers the signal handler, which will
313+
# recurse all the way down, updating the full_name
314+
child.save()
285315

286316

287317
class Role(SlugModel):

emeis/core/serializers.py

+3-15
Original file line numberDiff line numberDiff line change
@@ -80,24 +80,12 @@ class ScopeSerializer(BaseSerializer):
8080
level = serializers.SerializerMethodField()
8181

8282
def get_level(self, obj):
83-
depth = getattr(obj, "tree_depth", None)
84-
if depth is not None:
85-
return depth
86-
87-
# Note: This should only happen on CREATE, never in GET (Either list,
88-
# detail, or include!) In CREATE, it's a new object that doesn't come
89-
# from a QS
90-
91-
# Sometimes, the model object may come out of a non-django-tree-queries
92-
# QS, and thus would not have the `tree_*` attributes amended. Then we
93-
# need to go the "slow path"
9483
if not obj.pk and obj.parent_id:
95-
# unsaved object, sometimes used in unit tests etc
84+
# unsaved object, sometimes used in unit tests etc. Can't rely on
85+
# `all_parents` being set just yet
9686
return self.get_level(obj.parent) + 1
9787

98-
if obj.parent_id:
99-
return obj.ancestors().count()
100-
return 0
88+
return len(obj.all_parents)
10189

10290
class Meta:
10391
model = Scope

0 commit comments

Comments
 (0)