Skip to content

Commit d322f06

Browse files
committed
Implement entry referral tracking for Spanner Advanced Search
- Add support for tracking referral relationships between entries in Spanner - Create new AdvancedSearchEntryReferral table to store referral connections - Implement method to convert origin entry IDs to Spanner entry IDs - Add a property graph to represent referral relationships - Enhance error handling for batch write operations - Collect and process referral relationships during entry synchronization
1 parent 7e03e07 commit d322f06

File tree

3 files changed

+134
-15
lines changed

3 files changed

+134
-15
lines changed

entry/services.py

+72-10
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,11 @@ def update_documents(kls, entity: Entity, is_update: bool = False):
345345

346346
# Process entries in chunks to avoid too large mutation groups
347347
entry_chunks = [entry_list[i : i + 100] for i in range(0, len(entry_list), 100)]
348+
entry_referrals: set[tuple[str, int]] = (
349+
set()
350+
) # {(spanner_entry_id, referral_origin_entry_id), ...}
351+
352+
# First, write all entries and their attributes
348353
for chunk in entry_chunks:
349354
# Process each entry in the chunk with its own mutation group
350355
with repo.database.mutation_groups() as mg:
@@ -387,15 +392,28 @@ def update_documents(kls, entity: Entity, is_update: bool = False):
387392
)
388393

389394
for attrv in attr.prefetch_values:
390-
spanner_attribute_values.append(
391-
AdvancedSearchAttributeValue.create_instance(
392-
entry_id=spanner_entry_id,
393-
attribute_id=spanner_attr_id,
394-
attribute_value_id=str(uuid.uuid4()),
395-
entity_attr=entity_attr,
396-
attrv=attrv,
397-
)
395+
value = AdvancedSearchAttributeValue.create_instance(
396+
entry_id=spanner_entry_id,
397+
attribute_id=spanner_attr_id,
398+
attribute_value_id=str(uuid.uuid4()),
399+
entity_attr=entity_attr,
400+
attrv=attrv,
398401
)
402+
spanner_attribute_values.append(value)
403+
404+
# Collect referral relationships
405+
match entity_attr.type:
406+
case AttrType.OBJECT | AttrType.NAMED_OBJECT:
407+
if attrv.referral:
408+
entry_referrals.add(
409+
(spanner_entry_id, attrv.referral.id)
410+
)
411+
case AttrType.ARRAY_OBJECT | AttrType.ARRAY_NAMED_OBJECT:
412+
for array_value in attrv.data_array.all():
413+
if array_value.referral:
414+
entry_referrals.add(
415+
(spanner_entry_id, array_value.referral.id)
416+
)
399417

400418
# Create a mutation group for this entry and its related data
401419
group = mg.group()
@@ -407,8 +425,52 @@ def update_documents(kls, entity: Entity, is_update: bool = False):
407425

408426
# Batch write all mutation groups for this chunk
409427
responses = mg.batch_write()
410-
if not all(response.status.code == 0 for response in responses):
411-
raise Exception(f"Failed to batch write to Spanner: {responses}")
428+
if any(response.status.code != 0 for response in responses):
429+
error_details = [
430+
(
431+
f"code: {response.status.code}, "
432+
f"message: {response.status.message}"
433+
)
434+
for response in responses
435+
if response.status.code != 0
436+
]
437+
raise Exception(f"Failed to batch write to Spanner: {error_details}")
438+
439+
if entry_referrals:
440+
# Get mapping from OriginEntryId to EntryId for referrals
441+
referral_origin_ids = {ref_id for _, ref_id in entry_referrals}
442+
entry_id_mapping = repo.get_entry_id_mapping(list(referral_origin_ids))
443+
444+
# Convert OriginEntryId to EntryId and filter out any missing mappings
445+
converted_referrals = [
446+
(entry_id, entry_id_mapping[ref_id])
447+
for entry_id, ref_id in entry_referrals
448+
if ref_id in entry_id_mapping
449+
]
450+
451+
if converted_referrals:
452+
with repo.database.mutation_groups() as mg:
453+
referral_chunks = [
454+
converted_referrals[i : i + 1000]
455+
for i in range(0, len(converted_referrals), 1000)
456+
]
457+
for chunk in referral_chunks:
458+
group = mg.group()
459+
repo.insert_entry_referrals(chunk, group)
460+
461+
responses = mg.batch_write()
462+
if any(response.status.code != 0 for response in responses):
463+
error_details = [
464+
(
465+
f"code: {response.status.code}, "
466+
f"message: {response.status.message}"
467+
)
468+
for response in responses
469+
if response.status.code != 0
470+
]
471+
raise Exception(
472+
f"Failed to batch write referrals to Spanner: {error_details}"
473+
)
412474

413475
except Exception as e:
414476
Logger.warning(f"Failed to sync data to Spanner: {e}")

entry/spanner/schema.sql

+17-5
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,25 @@ CREATE TABLE AdvancedSearchAttributeValue (
2626
AttributeId STRING(36) NOT NULL,
2727
AttributeValueId STRING(36) NOT NULL,
2828
Value STRING(MAX) NOT NULL,
29-
RawValue JSON,
30-
31-
Value_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Value)) HIDDEN
29+
RawValue JSON
3230
) PRIMARY KEY (EntryId, AttributeId, AttributeValueId),
3331
INTERLEAVE IN PARENT AdvancedSearchAttribute ON DELETE CASCADE;
3432

3533
CREATE INDEX AdvancedSearchAttributeValueParentKeyIndex ON AdvancedSearchAttributeValue(EntryId, AttributeId);
3634

37-
CREATE SEARCH INDEX AdvancedSearchAttributeValueValueIndex
38-
ON AdvancedSearchAttributeValue(Value_Tokens);
35+
36+
CREATE TABLE AdvancedSearchEntryReferral (
37+
EntryId STRING(36) NOT NULL,
38+
ReferralId STRING(36) NOT NULL,
39+
FOREIGN KEY (EntryId) REFERENCES AdvancedSearchEntry (EntryId),
40+
FOREIGN KEY (ReferralId) REFERENCES AdvancedSearchEntry (EntryId)
41+
) PRIMARY KEY (EntryId, ReferralId);
42+
43+
CREATE PROPERTY GRAPH AdvancedSearchGraph
44+
NODE TABLES (AdvancedSearchEntry)
45+
EDGE TABLES (
46+
AdvancedSearchEntryReferral
47+
SOURCE KEY (EntryId) REFERENCES AdvancedSearchEntry (EntryId)
48+
DESTINATION KEY (ReferralId) REFERENCES AdvancedSearchEntry (EntryId)
49+
LABEL Referral
50+
);

entry/spanner_advanced_search.py

+45
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,25 @@ def insert_attribute_values(
237237
],
238238
)
239239

240+
def insert_entry_referrals(
241+
self, entry_referrals: list[tuple[str, str]], operation: SpannerOperation
242+
) -> None:
243+
"""Insert entry referrals in a batch or mutation group.
244+
245+
Args:
246+
entry_referrals: List of tuples (spanner_entry_id, referral_entry_id)
247+
operation: Spanner operation (batch or mutation group)
248+
"""
249+
if not entry_referrals:
250+
return
251+
252+
# Insert the referral relationships
253+
operation.insert(
254+
table="AdvancedSearchEntryReferral",
255+
columns=("EntryId", "ReferralId"),
256+
values=entry_referrals,
257+
)
258+
240259
# Keep the single-record methods for backward compatibility
241260
def insert_entry(self, entry: AdvancedSearchEntry, transaction=None) -> None:
242261
"""Insert a single entry"""
@@ -776,3 +795,29 @@ def get_joined_entries(
776795
"ret_count": len(join_entries_dict),
777796
"ret_values": list(join_entries_dict.values()),
778797
}
798+
799+
def get_entry_id_mapping(self, origin_entry_ids: list[int]) -> dict[int, str]:
800+
"""Get mapping from OriginEntryId to EntryId.
801+
802+
Args:
803+
origin_entry_ids: List of original entry IDs from Django
804+
805+
Returns:
806+
Dictionary mapping OriginEntryId to EntryId
807+
"""
808+
if not origin_entry_ids:
809+
return {}
810+
811+
query = """
812+
SELECT OriginEntryId, EntryId
813+
FROM AdvancedSearchEntry
814+
WHERE OriginEntryId IN UNNEST(@origin_entry_ids)
815+
"""
816+
params = {"origin_entry_ids": origin_entry_ids}
817+
param_types = {
818+
"origin_entry_ids": spanner_v1.param_types.Array(spanner_v1.param_types.INT64)
819+
}
820+
821+
with self.database.snapshot() as snapshot:
822+
results = snapshot.execute_sql(query, params=params, param_types=param_types)
823+
return {row[0]: row[1] for row in results}

0 commit comments

Comments
 (0)