Skip to content

Commit 58844ab

Browse files
committed
⚡ [#538] Optimize objects list performance
the filter to only include the objectrecords with the highest index per object was causing major performance degradations, especially in combination with filters on data_attrs. Instead of using `Max(...)` together with GROUP BY, we now use Window to figure out the max index per object which is more efficient for larger datasets
1 parent 0654a9a commit 58844ab

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

src/objects/core/query.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from django.db import models
2+
from django.db.models import F, Window
3+
from django.db.models.functions import RowNumber
24

35
from vng_api_common.utils import get_uuid_from_path
46
from zgw_consumers.models import Service
@@ -43,14 +45,20 @@ def keep_max_record_per_object(self):
4345
"""
4446
Return records with the largest index for the object
4547
"""
46-
filtered_records = self.order_by()
47-
grouped_records = (
48-
filtered_records.filter(object=models.OuterRef("object"))
49-
.values("object")
50-
.annotate(max_index=models.Max("index"))
51-
.values("max_index")
48+
filtered_records = (
49+
self.filter(object=models.OuterRef("object"))
50+
.annotate(
51+
row_number=Window(
52+
expression=RowNumber(),
53+
partition_by=[F("object")],
54+
order_by=F("index").desc(),
55+
)
56+
)
57+
.filter(row_number=1)
58+
.values("index")
5259
)
53-
return self.filter(index=models.Subquery(grouped_records))
60+
61+
return self.filter(index__in=filtered_records)
5462

5563
def filter_for_date(self, date):
5664
"""

src/objects/tests/v2/test_object_api.py

+38
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,44 @@ def test_list_available_today(self):
594594
self.assertEqual(object_data["uuid"], str(self.object.uuid))
595595
self.assertEqual(object_data["record"]["data"], {"name": "new"})
596596

597+
@freeze_time("2024-08-31")
598+
def test_only_show_latest_index(self):
599+
"""
600+
In the list endpoint, only the latest record that existed at the given date
601+
should show up
602+
"""
603+
object_url = reverse("object-detail", kwargs={"uuid": self.object.uuid})
604+
object2 = ObjectFactory.create(object_type=self.object_type)
605+
object2_url = reverse("object-detail", kwargs={"uuid": object2.uuid})
606+
ObjectRecordFactory.create(
607+
object=object2,
608+
index=1,
609+
data={"name": "old"},
610+
start_at="2024-08-01",
611+
end_at="2024-08-28",
612+
registration_at="2024-08-02",
613+
)
614+
ObjectRecordFactory.create(
615+
object=object2,
616+
index=2,
617+
data={"name": "new"},
618+
start_at="2024-08-28",
619+
end_at="2024-09-30",
620+
registration_at="2024-08-02",
621+
)
622+
623+
response = self.client.get(self.url, {"date": "2024-08-30"})
624+
625+
self.assertEqual(response.status_code, status.HTTP_200_OK)
626+
627+
data = response.json()
628+
629+
self.assertEqual(data["count"], 2)
630+
self.assertEqual(data["results"][0]["record"]["index"], 2)
631+
self.assertEqual(data["results"][0]["url"], f"http://testserver{object2_url}")
632+
self.assertEqual(data["results"][1]["record"]["index"], 1)
633+
self.assertEqual(data["results"][1]["url"], f"http://testserver{object_url}")
634+
597635
def test_list_available_for_date(self):
598636
with self.subTest("filter on old name"):
599637
response = self.client.get(

0 commit comments

Comments
 (0)