1
1
import typing
2
2
import collections
3
+ import dataclasses
3
4
from typing import Annotated , NamedTuple , Any
4
5
5
6
class Vector (NamedTuple ):
6
7
dim : int | None
7
8
8
- class NumBits (NamedTuple ):
9
- bits : int
10
-
9
+ class TypeKind (NamedTuple ):
10
+ kind : str
11
11
class TypeAttr :
12
12
key : str
13
13
value : Any
@@ -16,10 +16,10 @@ def __init__(self, key: str, value: Any):
16
16
self .key = key
17
17
self .value = value
18
18
19
- Float32 = Annotated [float , NumBits ( 32 )]
20
- Float64 = Annotated [float , NumBits ( 64 )]
21
- Range = Annotated [tuple [int , int ], 'range' ]
22
- Json = Annotated [Any , 'json' ]
19
+ Float32 = Annotated [float , TypeKind ( 'Float32' )]
20
+ Float64 = Annotated [float , TypeKind ( 'Float64' )]
21
+ Range = Annotated [tuple [int , int ], TypeKind ( 'Range' ) ]
22
+ Json = Annotated [Any , TypeKind ( 'Json' ) ]
23
23
24
24
def _find_annotation (metadata , cls ):
25
25
for m in iter (metadata ):
@@ -32,45 +32,65 @@ def _get_origin_type_and_metadata(t):
32
32
return (t .__origin__ , t .__metadata__ )
33
33
return (t , ())
34
34
35
- def _basic_type_to_json_value (t , metadata ):
35
+ def _dump_fields_schema (cls : type ) -> list [dict [str , Any ]]:
36
+ return [
37
+ {
38
+ 'name' : field .name ,
39
+ 'value_type' : _dump_enriched_type (field .type ),
40
+ }
41
+ for field in dataclasses .fields (cls )
42
+ ]
43
+
44
+ def _dump_type (t , metadata ):
36
45
origin_type = typing .get_origin (t )
37
46
if origin_type is collections .abc .Sequence or origin_type is list :
38
- dim = _find_annotation (metadata , Vector )
39
- if dim is None :
40
- raise ValueError (f"Vector dimension not found for { t } " )
41
47
args = typing .get_args (t )
42
- type_json = {
43
- 'kind' : 'Vector' ,
44
- 'element_type' : _basic_type_to_json_value (* _get_origin_type_and_metadata (args [0 ])),
45
- 'dimension' : dim .dim ,
48
+ elem_type , elem_type_metadata = _get_origin_type_and_metadata (args [0 ])
49
+ vector_annot = _find_annotation (metadata , Vector )
50
+ if vector_annot is not None :
51
+ encoded_type = {
52
+ 'kind' : 'Vector' ,
53
+ 'element_type' : _dump_type (elem_type , elem_type_metadata ),
54
+ 'dimension' : vector_annot .dim ,
55
+ }
56
+ elif dataclasses .is_dataclass (elem_type ):
57
+ encoded_type = {
58
+ 'kind' : 'Table' ,
59
+ 'row' : _dump_fields_schema (elem_type ),
60
+ }
61
+ else :
62
+ raise ValueError (f"Unsupported type: { t } " )
63
+ elif dataclasses .is_dataclass (t ):
64
+ encoded_type = {
65
+ 'kind' : 'Struct' ,
66
+ 'fields' : _dump_fields_schema (t ),
46
67
}
47
68
else :
48
- if t is bytes :
49
- kind = 'Bytes'
50
- elif t is str :
51
- kind = 'Str'
52
- elif t is bool :
53
- kind = 'Bool'
54
- elif t is int :
55
- kind = 'Int64'
56
- elif t is float :
57
- num_bits = _find_annotation (metadata , NumBits )
58
- kind = 'Float32' if num_bits is not None and num_bits .bits <= 32 else 'Float64'
59
- elif t is Range :
60
- kind = 'Range'
61
- elif t is Json :
62
- kind = 'Json'
69
+ type_kind = _find_annotation (metadata , TypeKind )
70
+ if type_kind is not None :
71
+ kind = type_kind .kind
63
72
else :
64
- raise ValueError (f"type unsupported yet: { t } " )
65
- type_json = { 'kind' : kind }
73
+ if t is bytes :
74
+ kind = 'Bytes'
75
+ elif t is str :
76
+ kind = 'Str'
77
+ elif t is bool :
78
+ kind = 'Bool'
79
+ elif t is int :
80
+ kind = 'Int64'
81
+ elif t is float :
82
+ kind = 'Float64'
83
+ else :
84
+ raise ValueError (f"type unsupported yet: { t } " )
85
+ encoded_type = { 'kind' : kind }
66
86
67
- return type_json
87
+ return encoded_type
68
88
69
- def _enriched_type_to_json_value (t ) -> dict [str , Any ] | None :
89
+ def _dump_enriched_type (t ) -> dict [str , Any ] | None :
70
90
if t is None :
71
91
return None
72
92
t , metadata = _get_origin_type_and_metadata (t )
73
- enriched_type_json = {'type' : _basic_type_to_json_value (t , metadata )}
93
+ enriched_type_json = {'type' : _dump_type (t , metadata )}
74
94
attrs = None
75
95
for attr in metadata :
76
96
if isinstance (attr , TypeAttr ):
@@ -86,4 +106,4 @@ def dump_type(t) -> dict[str, Any] | None:
86
106
"""
87
107
Convert a Python type to a CocoIndex's type in JSON.
88
108
"""
89
- return _enriched_type_to_json_value (t )
109
+ return _dump_enriched_type (t )
0 commit comments