Skip to content

Commit dcdf1bd

Browse files
committed
vibe coding enabled :)
1 parent f3d958f commit dcdf1bd

File tree

2 files changed

+188
-137
lines changed

2 files changed

+188
-137
lines changed

Diff for: packages/ragbits-core/src/ragbits/core/utils/dict_transformations.py

+174-112
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,179 @@ def flatten_dict(input_dict: dict[str, Any], parent_key: str = "", sep: str = ".
4343
return items
4444

4545

46+
def _parse_key(key: str) -> list[tuple[str, bool]]:
47+
"""Parse a key into parts, each part being (name, is_array_index)."""
48+
parts = []
49+
current = ""
50+
i = 0
51+
while i < len(key):
52+
if key[i] == "[":
53+
if current:
54+
parts.append((current, False)) # Parent is not an array
55+
current = ""
56+
i += 1 # Skip [
57+
start = i
58+
while i < len(key) and key[i] != "]":
59+
i += 1
60+
parts.append((key[start:i], True))
61+
i += 1 # Skip ]
62+
if i < len(key) and key[i] == ".":
63+
i += 1 # Skip .
64+
elif key[i] == ".":
65+
if current:
66+
parts.append((current, False))
67+
current = ""
68+
i += 1
69+
else:
70+
current += key[i]
71+
i += 1
72+
if current:
73+
parts.append((current, False))
74+
return parts
75+
76+
77+
def _ensure_array(obj: dict[str, Any] | list[Any], key: str) -> list[Any]:
78+
"""Ensure that obj[key] is a list, creating it if necessary."""
79+
if isinstance(obj, list):
80+
return obj
81+
if key not in obj or not isinstance(obj[key], list):
82+
obj[key] = []
83+
return obj[key]
84+
85+
86+
def _ensure_dict(obj: dict[str, Any] | list[Any], key: str) -> dict[str, Any]:
87+
"""Ensure that obj[key] is a dict, creating it if necessary."""
88+
if isinstance(obj, list):
89+
# Lists should be handled by the caller
90+
raise TypeError("Cannot ensure dict in a list")
91+
if key not in obj or not isinstance(obj[key], dict):
92+
obj[key] = {}
93+
return obj[key]
94+
95+
96+
DictOrList = dict[str, Any] | list[Any]
97+
98+
99+
def _handle_array_part(
100+
current: DictOrList,
101+
part: str,
102+
parent_key: str | None = None,
103+
) -> DictOrList:
104+
"""Handle an array part in the key."""
105+
idx = int(part)
106+
if isinstance(current, list):
107+
while len(current) <= idx:
108+
current.append({})
109+
return current[idx]
110+
if parent_key is None:
111+
raise ValueError(f"Array part '{part}' without parent key")
112+
current_list = _ensure_array(current, parent_key)
113+
while len(current_list) <= idx:
114+
current_list.append({})
115+
return current_list[idx]
116+
117+
118+
def _handle_dict_part(
119+
current: DictOrList,
120+
part: str,
121+
next_is_array: bool,
122+
array_idx: int | None = None,
123+
) -> DictOrList:
124+
"""Handle a dictionary part in the key."""
125+
if isinstance(current, list):
126+
if array_idx is None:
127+
raise ValueError("Array index is required when current is a list")
128+
while len(current) <= array_idx:
129+
current.append({})
130+
current = current[array_idx]
131+
if not isinstance(current, dict):
132+
current = {}
133+
current[str(array_idx)] = current
134+
if next_is_array:
135+
return _ensure_array(current, part)
136+
return _ensure_dict(current, part)
137+
138+
139+
def _handle_single_part(
140+
new_dict: dict[str, Any],
141+
first_part: str,
142+
is_array: bool,
143+
value: SimpleTypes,
144+
) -> None:
145+
"""Handle a single-part key."""
146+
if is_array:
147+
idx = int(first_part)
148+
current = _ensure_array(new_dict, first_part)
149+
while len(current) <= idx:
150+
current.append(None)
151+
current[idx] = value
152+
else:
153+
new_dict[first_part] = value
154+
155+
156+
def _handle_last_array_part(
157+
current_obj: DictOrList,
158+
last_part: str,
159+
value: SimpleTypes,
160+
parts: list[tuple[str, bool]],
161+
) -> None:
162+
"""Handle the last part of the key when it's an array index."""
163+
idx = int(last_part)
164+
if len(parts) == 1:
165+
# Direct array access like "users[0]"
166+
parent_key = parts[0][0]
167+
current_obj = _ensure_array(current_obj, parent_key)
168+
if isinstance(current_obj, list):
169+
while len(current_obj) <= idx:
170+
current_obj.append(None)
171+
current_obj[idx] = value
172+
else:
173+
raise TypeError("Expected list but got dict")
174+
175+
176+
def _handle_last_dict_part(
177+
current_obj: DictOrList,
178+
last_part: str,
179+
value: SimpleTypes,
180+
parts: list[tuple[str, bool]],
181+
) -> None:
182+
"""Handle the last part of the key when it's a dictionary key."""
183+
if isinstance(current_obj, list):
184+
# We're in a list, so we need to ensure the current index has a dict
185+
idx = int(parts[-2][0]) # Get the index from the previous part
186+
while len(current_obj) <= idx:
187+
current_obj.append({})
188+
current_obj = current_obj[idx]
189+
if not isinstance(current_obj, dict):
190+
current_obj = {}
191+
current_obj[str(idx)] = current_obj
192+
if isinstance(current_obj, dict):
193+
current_obj[last_part] = value
194+
else:
195+
raise TypeError("Expected dict but got list")
196+
197+
198+
def _set_value(current: dict[str, Any], parts: list[tuple[str, bool]], value: SimpleTypes) -> None:
199+
"""Set a value in the dictionary based on the parsed key parts."""
200+
current_obj: DictOrList = current
201+
202+
# Handle all parts except the last one
203+
for i, (part, is_array) in enumerate(parts[:-1]):
204+
if is_array:
205+
current_obj = _handle_array_part(current_obj, part, parts[i - 1][0] if i > 0 else None)
206+
else:
207+
next_is_array = i + 1 < len(parts) and parts[i + 1][1]
208+
array_idx = int(parts[i][0]) if isinstance(current_obj, list) else None
209+
current_obj = _handle_dict_part(current_obj, part, next_is_array, array_idx)
210+
211+
# Handle the last part
212+
last_part, is_array = parts[-1]
213+
if is_array:
214+
_handle_last_array_part(current_obj, last_part, value, parts)
215+
else:
216+
_handle_last_dict_part(current_obj, last_part, value, parts)
217+
218+
46219
def unflatten_dict(input_dict: dict[str, Any]) -> dict[str, Any]:
47220
"""
48221
Converts a flattened dictionary with dot notation and array notation into a nested structure.
@@ -73,110 +246,6 @@ def unflatten_dict(input_dict: dict[str, Any]) -> dict[str, Any]:
73246

74247
new_dict: dict[str, Any] = {}
75248

76-
def _parse_key(key: str) -> list[tuple[str, bool]]:
77-
"""Parse a key into parts, each part being (name, is_array_index)."""
78-
parts = []
79-
current = ""
80-
i = 0
81-
while i < len(key):
82-
if key[i] == "[":
83-
if current:
84-
parts.append((current, False)) # Parent is not an array
85-
current = ""
86-
i += 1 # Skip [
87-
start = i
88-
while i < len(key) and key[i] != "]":
89-
i += 1
90-
parts.append((key[start:i], True))
91-
i += 1 # Skip ]
92-
if i < len(key) and key[i] == ".":
93-
i += 1 # Skip .
94-
elif key[i] == ".":
95-
if current:
96-
parts.append((current, False))
97-
current = ""
98-
i += 1
99-
else:
100-
current += key[i]
101-
i += 1
102-
if current:
103-
parts.append((current, False))
104-
return parts
105-
106-
def _ensure_array(obj: dict[str, Any], key: str) -> list:
107-
"""Ensure that obj[key] is a list, creating it if necessary."""
108-
if key not in obj:
109-
obj[key] = []
110-
elif not isinstance(obj[key], list):
111-
obj[key] = []
112-
return obj[key]
113-
114-
def _ensure_dict(obj: Any, key: str) -> dict:
115-
"""Ensure that obj[key] is a dict, creating it if necessary."""
116-
if key not in obj:
117-
obj[key] = {}
118-
elif not isinstance(obj[key], dict):
119-
obj[key] = {}
120-
return obj[key]
121-
122-
def _set_value(current: dict[str, Any], parts: list[tuple[str, bool]], value: Any) -> None:
123-
"""Set a value in the dictionary based on the parsed key parts."""
124-
for i, (part, is_array) in enumerate(parts[:-1]):
125-
if is_array:
126-
idx = int(part)
127-
if i > 0:
128-
# Get the parent key and ensure it's an array
129-
parent_key = parts[i - 1][0]
130-
if isinstance(current, list):
131-
while len(current) <= idx:
132-
current.append({})
133-
current = current[idx]
134-
else:
135-
current = _ensure_array(current, parent_key)
136-
while len(current) <= idx:
137-
current.append({})
138-
current = current[idx]
139-
else:
140-
if i + 1 < len(parts) and parts[i + 1][1]: # Next part is array
141-
if isinstance(current, list):
142-
# We're in a list, so we need to ensure the current index has a dict
143-
idx = int(parts[i][0])
144-
while len(current) <= idx:
145-
current.append({})
146-
current = current[idx]
147-
current = _ensure_array(current, part)
148-
else:
149-
current = _ensure_array(current, part)
150-
else:
151-
if isinstance(current, list):
152-
# We're in a list, so we need to ensure the current index has a dict
153-
idx = int(parts[i][0])
154-
while len(current) <= idx:
155-
current.append({})
156-
current = current[idx]
157-
current = _ensure_dict(current, part)
158-
else:
159-
current = _ensure_dict(current, part)
160-
161-
last_part, is_array = parts[-1]
162-
if is_array:
163-
idx = int(last_part)
164-
if len(parts) == 1:
165-
# Direct array access like "users[0]"
166-
parent_key = parts[0][0]
167-
current = _ensure_array(current, parent_key)
168-
while len(current) <= idx:
169-
current.append(None)
170-
current[idx] = value
171-
else:
172-
if isinstance(current, list):
173-
# We're in a list, so we need to ensure the current index has a dict
174-
idx = int(parts[-2][0]) # Get the index from the previous part
175-
while len(current) <= idx:
176-
current.append({})
177-
current = current[idx]
178-
current[last_part] = value
179-
180249
# Sort keys to ensure we process parents before children
181250
field_keys = sorted(input_dict.keys())
182251
for key in field_keys:
@@ -191,14 +260,7 @@ def _set_value(current: dict[str, Any], parts: list[tuple[str, bool]], value: An
191260

192261
# Set the value
193262
if len(parts) == 1:
194-
if is_array:
195-
idx = int(first_part)
196-
current = _ensure_array(new_dict, first_part)
197-
while len(current) <= idx:
198-
current.append(None)
199-
current[idx] = input_dict[key]
200-
else:
201-
new_dict[first_part] = input_dict[key]
263+
_handle_single_part(new_dict, first_part, is_array, input_dict[key])
202264
else:
203265
_set_value(new_dict, parts, input_dict[key])
204266

Diff for: packages/ragbits-core/tests/unit/utils/test_dict_transformations.py

+14-25
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
from typing import Any
2+
13
import pytest
24

3-
from ragbits.core.utils.dict_transformations import flatten_dict, unflatten_dict
5+
from ragbits.core.utils.dict_transformations import SimpleTypes, flatten_dict, unflatten_dict
46

57

68
def test_flatten_dict_simple():
79
"""Test flattening a simple dictionary."""
8-
input_dict = {"key1": "value1", "key2": "value2"}
9-
expected = {"key1": "value1", "key2": "value2"}
10+
input_dict: dict[str, Any] = {"key1": "value1", "key2": "value2"}
11+
expected: dict[str, SimpleTypes] = {"key1": "value1", "key2": "value2"}
1012
assert flatten_dict(input_dict) == expected
1113

1214

@@ -46,11 +48,11 @@ def test_flatten_dict_empty():
4648

4749
def test_flatten_dict_with_non_dict_values():
4850
"""Test flattening a dictionary with various value types."""
49-
input_dict = {
51+
input_dict: dict[str, Any] = {
5052
"key1": "value1",
5153
"nested": {"subkey1": 42, "subkey2": [1, 2, 3], "subkey3": {"a": 1}, "subkey4": True, "subkey5": 3.14},
5254
}
53-
expected = {
55+
expected: dict[str, SimpleTypes] = {
5456
"key1": "value1",
5557
"nested.subkey1": 42,
5658
"nested.subkey2[0]": 1,
@@ -65,7 +67,7 @@ def test_flatten_dict_with_non_dict_values():
6567

6668
def test_flatten_unflatten():
6769
"""Test flattening and unflattening a dictionary."""
68-
input_dict = {
70+
input_dict: dict[str, Any] = {
6971
"key1": "value1",
7072
"nested": {
7173
"subkey1": "subvalue1",
@@ -154,8 +156,8 @@ def test_mixed_array_and_object():
154156
def test_unflatten_dict_notation_based_types():
155157
"""Test that unflatten_dict uses notation to determine types, not heuristics."""
156158
# Test that numeric keys without array notation stay as dict
157-
input_dict = {"0": "first", "1": "second", "2": "third"}
158-
expected = {"0": "first", "1": "second", "2": "third"}
159+
input_dict: dict[str, SimpleTypes] = {"0": "first", "1": "second", "2": "third"}
160+
expected: dict[str, Any] = {"0": "first", "1": "second", "2": "third"}
159161
assert unflatten_dict(input_dict) == expected
160162

161163
# Test that array notation creates lists
@@ -171,31 +173,18 @@ def test_unflatten_dict_notation_based_types():
171173
"nested[0].name": "John",
172174
"nested[0].age": 30,
173175
"nested[1].name": "Jane",
174-
"nested[1].age": 25
176+
"nested[1].age": 25,
175177
}
176178
expected = {
177179
"dict_key": "value",
178180
"list_key": ["first", "second"],
179-
"nested": [
180-
{"name": "John", "age": 30},
181-
{"name": "Jane", "age": 25}
182-
]
181+
"nested": [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}],
183182
}
184183
assert unflatten_dict(input_dict) == expected
185184

186185
# Test that numeric keys in nested dicts stay as dict
187-
input_dict = {
188-
"config.0.name": "first",
189-
"config.1.name": "second",
190-
"config.2.name": "third"
191-
}
192-
expected = {
193-
"config": {
194-
"0": {"name": "first"},
195-
"1": {"name": "second"},
196-
"2": {"name": "third"}
197-
}
198-
}
186+
input_dict = {"config.0.name": "first", "config.1.name": "second", "config.2.name": "third"}
187+
expected = {"config": {"0": {"name": "first"}, "1": {"name": "second"}, "2": {"name": "third"}}}
199188
assert unflatten_dict(input_dict) == expected
200189

201190

0 commit comments

Comments
 (0)