Skip to content

Commit 87c96f1

Browse files
Merge pull request #5085 from open-formulieren/issue/5084-fix-performance-form-definition-update
🚑 Fix performance form definition update
2 parents 7c10f99 + 86b83cc commit 87c96f1

File tree

3 files changed

+461
-65
lines changed

3 files changed

+461
-65
lines changed

src/openforms/formio/utils.py

-16
Original file line numberDiff line numberDiff line change
@@ -177,22 +177,6 @@ def is_layout_component(component: Component) -> bool:
177177
return False
178178

179179

180-
def component_in_editgrid(configuration: dict, component: dict) -> bool:
181-
# Get all the editgrid components in the configuration
182-
editgrids = []
183-
for comp in iter_components(configuration=configuration):
184-
if comp["type"] == "editgrid":
185-
editgrids.append(comp)
186-
187-
# Check if the component is in the editgrid
188-
for editgrid in editgrids:
189-
for comp in iter_components(configuration=editgrid):
190-
if comp["key"] == component["key"]:
191-
return True
192-
193-
return False
194-
195-
196180
def get_component_datatype(component):
197181
component_type = component["type"]
198182
if component.get("multiple"):

src/openforms/forms/models/form_variable.py

+135-47
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
from __future__ import annotations
22

3-
from copy import deepcopy
4-
from typing import TYPE_CHECKING, ClassVar
3+
import itertools
4+
from copy import copy, deepcopy
5+
from typing import ClassVar
56

67
from django.db import models, transaction
78
from django.db.models import CheckConstraint, Q
89
from django.utils.translation import gettext_lazy as _
910

10-
from glom import glom
11+
import elasticapm
1112

1213
from openforms.formio.utils import (
13-
component_in_editgrid,
1414
get_component_datatype,
1515
get_component_default_value,
1616
is_layout_component,
@@ -26,29 +26,42 @@
2626
)
2727
from openforms.variables.utils import check_initial_value
2828

29+
from .form import Form
2930
from .form_definition import FormDefinition
3031

31-
if TYPE_CHECKING:
32-
from .form import Form
33-
34-
3532
EMPTY_PREFILL_PLUGIN = Q(prefill_plugin="")
3633
EMPTY_PREFILL_ATTRIBUTE = Q(prefill_attribute="")
3734
EMPTY_PREFILL_OPTIONS = Q(prefill_options={})
3835
USER_DEFINED = Q(source=FormVariableSources.user_defined)
3936

37+
# these are the attributes that are derived from the matching formio component,
38+
# see FormVariableManager.synchronize_for. Other attributes are relational or
39+
# related to user defined variables (like service fetch, prefill options...).
40+
UPSERT_ATTRIBUTES_TO_COMPARE: tuple[str, ...] = (
41+
"prefill_plugin",
42+
"prefill_attribute",
43+
"prefill_identifier_role",
44+
"name",
45+
"is_sensitive_data",
46+
"data_type",
47+
"initial_value",
48+
)
49+
4050

4151
class FormVariableManager(models.Manager["FormVariable"]):
4252
use_in_migrations = True
4353

4454
@transaction.atomic
45-
def create_for_form(self, form: "Form") -> None:
46-
form_steps = form.formstep_set.select_related("form_definition")
55+
def create_for_form(self, form: Form) -> None:
56+
form_steps = form.formstep_set.select_related( # pyright: ignore[reportAttributeAccessIssue]
57+
"form_definition"
58+
)
4759

4860
for form_step in form_steps:
4961
self.synchronize_for(form_step.form_definition)
5062

51-
@transaction.atomic
63+
@elasticapm.capture_span(span_type="app.core.models")
64+
@transaction.atomic(savepoint=False)
5265
def synchronize_for(self, form_definition: FormDefinition):
5366
"""
5467
Synchronize the form variables for a given form definition.
@@ -64,32 +77,40 @@ def synchronize_for(self, form_definition: FormDefinition):
6477
"""
6578
# Build the desired state
6679
desired_variables: list[FormVariable] = []
80+
desired_keys: set[str] = set()
6781
# XXX: looping over the configuration_wrapper is not (yet) viable because it
6882
# also yields the components nested inside edit grids, which we need to ignore.
69-
# So, we stick to iter_components. Performance wise this should be okay since we
70-
# only need to do one pass.
83+
# So, we stick to iter_components. We deliberately do a single pass to process
84+
# the formio definition and then "copy" the information for each affected form
85+
# so that we can avoid excessive component tree processing.
7186
configuration = form_definition.configuration
72-
for component in iter_components(configuration=configuration, recursive=True):
87+
for component in iter_components(
88+
configuration=configuration,
89+
recursive=True,
90+
# components inside edit grids are not real variables
91+
recurse_into_editgrid=False,
92+
):
7393
# we need to ignore components that don't actually hold any values - there's
7494
# no point to create variables for those.
7595
if is_layout_component(component):
7696
continue
7797
if component["type"] in ("content", "softRequiredErrors"):
7898
continue
79-
if component_in_editgrid(configuration, component):
80-
continue
8199

82100
# extract options from the component
83-
prefill_plugin = glom(component, "prefill.plugin", default="") or ""
84-
prefill_attribute = glom(component, "prefill.attribute", default="") or ""
85-
prefill_identifier_role = glom(
86-
component, "prefill.identifierRole", default=IdentifierRoles.main
101+
prefill = component.get("prefill", {})
102+
prefill_plugin = prefill.get("plugin") or ""
103+
prefill_attribute = prefill.get("attribute") or ""
104+
prefill_identifier_role = (
105+
prefill.get("identifierRole") or IdentifierRoles.main
87106
)
88107

108+
key = component["key"]
109+
desired_keys.add(key)
89110
desired_variables.append(
90111
self.model(
91112
form=None, # will be set later when visiting all affected forms
92-
key=component["key"],
113+
key=key,
93114
form_definition=form_definition,
94115
prefill_plugin=prefill_plugin,
95116
prefill_attribute=prefill_attribute,
@@ -102,8 +123,6 @@ def synchronize_for(self, form_definition: FormDefinition):
102123
)
103124
)
104125

105-
desired_keys = [variable.key for variable in desired_variables]
106-
107126
# if the Formio configuration of the form definition itself is updated and
108127
# components have been removed or their keys have changed, we know for certain
109128
# we can discard those old form variables - it doesn't matter which form they
@@ -113,41 +132,81 @@ def synchronize_for(self, form_definition: FormDefinition):
113132
)
114133
stale_variables.delete()
115134

116-
# check which form (steps) are affected and patch them up. It is irrelevant whether
135+
# Check which forms are affected and patch them up.
136+
# It is irrelevant whether
117137
# the form definition is re-usable or not, though semantically at most one form step
118138
# should be found for single-use form definitions.
119-
# fmt: off
120-
affected_form_steps = (
121-
form_definition
122-
.formstep_set # pyright: ignore[reportAttributeAccessIssue]
123-
.select_related("form")
139+
affected_forms = (
140+
Form.objects.filter(formstep__form_definition=form_definition)
141+
.order_by("id")
142+
.distinct("id")
143+
.values_list("pk", flat=True)
144+
.iterator()
124145
)
125-
# fmt: on
126146

127-
# Finally, collect all the instances and efficiently upsert them - creating missing
147+
# Collect all the instances and efficiently upsert them - creating missing
128148
# variables and updating existing variables in a single query.
129149
to_upsert: list[FormVariable] = []
130-
for step in affected_form_steps:
150+
151+
# We check which form variables actually need to be updated or inserted. If
152+
# naively sending everything to the UPSERT we are sending pointless data to
153+
# Postgres which puts unnecessary load.
154+
existing_form_variables = (
155+
self.filter(form_definition=form_definition).order_by("form_id").iterator()
156+
)
157+
# keep track of (form_id, form_key) tuples that were considered, so that we can
158+
# efficiently decide whether we can ignore it or not based on existing variables.
159+
seen: set[tuple[int, str]] = set()
160+
161+
# first look at form variables that already exist since we can exclude those
162+
# from the upsert
163+
form_variables_by_form = itertools.groupby(
164+
existing_form_variables, key=lambda fv: fv.form_id
165+
)
166+
for form_id, variables in form_variables_by_form:
167+
assert form_id is not None
168+
variables_by_key = {variable.key: variable for variable in variables}
169+
170+
def _add_variable(variable: FormVariable):
171+
form_specific_variable = copy(variable)
172+
form_specific_variable.form_id = form_id
173+
to_upsert.append(form_specific_variable)
174+
175+
# check whether we need to create or update the variable by comparing against
176+
# existing variables.
177+
for desired_variable in desired_variables:
178+
existing_variable = variables_by_key.get(key := desired_variable.key)
179+
seen.add((form_id, key))
180+
181+
if existing_variable is None:
182+
_add_variable(desired_variable)
183+
continue
184+
185+
# otherwise, check if we need to update or can skip this variable to
186+
# make the upsert query smaller
187+
if not existing_variable.matches(desired_variable):
188+
# it needs to be updated
189+
_add_variable(desired_variable)
190+
191+
# Finally, process variables that don't exist yet at all
192+
for form_id in affected_forms:
131193
for variable in desired_variables:
132-
form_specific_variable = deepcopy(variable)
133-
form_specific_variable.form = step.form
194+
_lookup = (form_id, variable.key)
195+
# it already exists and has been processed
196+
if _lookup in seen:
197+
continue
198+
199+
# it doesn't exist and needs to be created
200+
form_specific_variable = copy(variable)
201+
form_specific_variable.form_id = form_id
134202
to_upsert.append(form_specific_variable)
135203

136204
self.bulk_create(
137205
to_upsert,
138206
# enables UPSERT behaviour so that existing records get updated and missing
139207
# records inserted
140208
update_conflicts=True,
141-
update_fields=(
142-
"prefill_plugin",
143-
"prefill_attribute",
144-
"prefill_identifier_role",
145-
"name",
146-
"is_sensitive_data",
147-
"source",
148-
"data_type",
149-
"initial_value",
150-
),
209+
update_fields=UPSERT_ATTRIBUTES_TO_COMPARE + ("source",),
151210
unique_fields=("form", "key"),
152211
)
153212

@@ -159,6 +218,7 @@ class FormVariable(models.Model):
159218
help_text=_("Form to which this variable is related"),
160219
on_delete=models.CASCADE,
161220
)
221+
form_id: int | None
162222
form_definition = models.ForeignKey(
163223
to=FormDefinition,
164224
verbose_name=_("form definition"),
@@ -303,6 +363,11 @@ class Meta:
303363
def __str__(self):
304364
return _("Form variable '{key}'").format(key=self.key)
305365

366+
def save(self, *args, **kwargs):
367+
self.check_data_type_and_initial_value()
368+
369+
super().save(*args, **kwargs)
370+
306371
@property
307372
def json_schema(self) -> JSONObject | None:
308373
return self._json_schema
@@ -355,7 +420,30 @@ def check_data_type_and_initial_value(self):
355420
if self.source == FormVariableSources.user_defined:
356421
self.initial_value = check_initial_value(self.initial_value, self.data_type)
357422

358-
def save(self, *args, **kwargs):
359-
self.check_data_type_and_initial_value()
423+
def matches(self, other: FormVariable) -> bool:
424+
"""
425+
Check if the other form variable matches this instance.
360426
361-
super().save(*args, **kwargs)
427+
Matching can only be performed on component variables to check if they are
428+
still in sync. Foreign key relations to form etc. are ignored as this doesn't
429+
contain semantic information.
430+
"""
431+
assert (
432+
self.source == FormVariableSources.component
433+
), "Can only compare component variables"
434+
assert (
435+
other.source == FormVariableSources.component
436+
), "Can only compare component variables"
437+
assert self.key == other.key, (
438+
"Different keys are being compared, are you sure you're comparing "
439+
"the right instances?"
440+
)
441+
442+
# these are the attributes that are derived from the matching formio component,
443+
# see FormVariableManager.synchronize_for. Other attributes are relational or
444+
# related to user defined variables (like service fetch, prefill options...).
445+
all_equal = all(
446+
getattr(self, attr) == getattr(other, attr)
447+
for attr in UPSERT_ATTRIBUTES_TO_COMPARE
448+
)
449+
return all_equal

0 commit comments

Comments
 (0)