5
5
"""
6
6
from __future__ import annotations
7
7
8
- from typing import Any , Callable , Dict , List , Tuple , Union , cast
8
+ from typing import Callable , Dict
9
9
10
- from pydoctor .epydoc .markup import DocstringLinker , ParseError , ParsedDocstring , get_parser_by_name
11
- from pydoctor .node2stan import node2stan
12
- from pydoctor .napoleon .docstring import TokenType , TypeDocstring
10
+ from pydoctor .epydoc .markup import ParsedDocstring
11
+ from pydoctor .epydoc .markup ._pyval_repr import PyvalColorizer
12
+ from pydoctor .napoleon .docstring import TokenType , ITokenizer , Tokenizer
13
+ from pydoctor .epydoc .docutils import new_document , set_node_attributes , code
13
14
14
15
from docutils import nodes
15
- from twisted .web .template import Tag , tags
16
16
17
- class ParsedTypeDocstring ( TypeDocstring , ParsedDocstring ):
17
+ class NodeTokenizer ( ITokenizer [ nodes . document ] ):
18
18
"""
19
- Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
20
- allow to parse types from L{nodes.Node} objects, providing the C{--process-types} option.
19
+ A type tokenizer for annotation as docutils L{document <nodes.document>}.
21
20
"""
22
21
23
- FIELDS = ('type' , 'rtype' , 'ytype' , 'returntype' , 'yieldtype' )
22
+ def __init__ (self , annotation : nodes .document , * ,
23
+ warns_on_unknown_tokens : bool ) -> None :
24
+ # build tokens and warnings
25
+ self .warnings = warnings = [] # type: list[str]
26
+ raw_tokens = Tokenizer .recombine_sets (self .tokenize_document (annotation , warnings ))
27
+ self .tokens = Tokenizer .build (raw_tokens , warnings , warns_on_unknown_tokens )
24
28
25
- # yes this overrides the superclass type!
26
- _tokens : list [tuple [str | nodes .Node , TokenType ]] # type: ignore
27
-
28
- def __init__ (self , annotation : Union [nodes .document , str ],
29
- warns_on_unknown_tokens : bool = False , lineno : int = 0 ) -> None :
30
- ParsedDocstring .__init__ (self , ())
31
- if isinstance (annotation , nodes .document ):
32
- TypeDocstring .__init__ (self , '' , warns_on_unknown_tokens )
33
-
34
- _tokens = self ._tokenize_node_type_spec (annotation )
35
- self ._tokens = cast ('list[tuple[str | nodes.Node, TokenType]]' ,
36
- self ._build_tokens (_tokens ))
37
- self ._trigger_warnings ()
38
- else :
39
- TypeDocstring .__init__ (self , annotation , warns_on_unknown_tokens )
40
-
41
-
42
- # We need to store the line number because we need to pass it to DocstringLinker.link_xref
43
- self ._lineno = lineno
44
-
45
- @property
46
- def has_body (self ) -> bool :
47
- return len (self ._tokens )> 0
48
-
49
- def to_node (self ) -> nodes .document :
50
- """
51
- Not implemented at this time :/
52
- """
53
- #TODO: Fix this soon - PR https://github.com/twisted/pydoctor/pull/874
54
- raise NotImplementedError ()
55
-
56
- def to_stan (self , docstring_linker : DocstringLinker ) -> Tag :
57
- """
58
- Present the type as a stan tree.
59
- """
60
- return self ._convert_type_spec_to_stan (docstring_linker )
61
-
62
- def _tokenize_node_type_spec (self , spec : nodes .document ) -> List [Union [str , nodes .Node ]]:
29
+ @staticmethod
30
+ def tokenize_document (spec : nodes .document , warnings : list [str ]) -> list [str | nodes .Node ]:
63
31
def _warn_not_supported (n :nodes .Node ) -> None :
64
- self .warnings .append (f"Unexpected element in type specification field: element '{ n .__class__ .__name__ } '. "
65
- "This value should only contain text or inline markup." )
32
+ warnings .append ("Unexpected element in type specification field: "
33
+ f"element '{ n .__class__ .__name__ } '. This value should "
34
+ "only contain text or inline markup." )
66
35
67
- tokens : List [ Union [ str , nodes .Node ] ] = []
36
+ tokens : list [ str | nodes .Node ] = []
68
37
# Determine if the content is nested inside a paragraph
69
38
# this is generally the case, except for consolidated fields generate documents.
70
39
if spec .children and isinstance (spec .children [0 ], nodes .paragraph ):
@@ -77,99 +46,88 @@ def _warn_not_supported(n:nodes.Node) -> None:
77
46
for child in children :
78
47
if isinstance (child , nodes .Text ):
79
48
# Tokenize the Text node with the same method TypeDocstring uses.
80
- tokens .extend (TypeDocstring . _tokenize_type_spec (child .astext ()))
49
+ tokens .extend (Tokenizer . tokenize_str (child .astext ()))
81
50
elif isinstance (child , nodes .Inline ):
82
51
tokens .append (child )
83
52
else :
84
53
_warn_not_supported (child )
85
54
86
55
return tokens
87
56
88
- def _convert_obj_tokens_to_stan (self , tokens : List [Tuple [Any , TokenType ]],
89
- docstring_linker : DocstringLinker ) -> list [tuple [Any , TokenType ]]:
90
- """
91
- Convert L{TokenType.OBJ} and PEP 484 like L{TokenType.DELIMITER} type to stan, merge them together. Leave the rest untouched.
92
57
93
- @param tokens: List of tuples: C{(token, type)}
94
- """
58
+ class ParsedTypeDocstring (ParsedDocstring ):
59
+ """
60
+ Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
61
+ allow to parse types from L{nodes.Node} objects,
62
+ providing the C{--process-types} option.
63
+ """
95
64
96
- combined_tokens : list [tuple [Any , TokenType ]] = []
97
-
98
- open_parenthesis = 0
99
- open_square_braces = 0
100
-
101
- for _token , _type in tokens :
102
- # The actual type of_token is str | Tag | Node.
103
-
104
- if (_type is TokenType .DELIMITER and _token in ('[' , '(' , ')' , ']' )) \
105
- or _type is TokenType .OBJ :
106
- if _token == "[" : open_square_braces += 1
107
- elif _token == "(" : open_parenthesis += 1
108
-
109
- if _type is TokenType .OBJ :
110
- _token = docstring_linker .link_xref (
111
- _token , _token , self ._lineno )
112
-
113
- if open_square_braces + open_parenthesis > 0 :
114
- try : last_processed_token = combined_tokens [- 1 ]
115
- except IndexError :
116
- combined_tokens .append ((_token , _type ))
117
- else :
118
- if last_processed_token [1 ] is TokenType .OBJ \
119
- and isinstance (last_processed_token [0 ], Tag ):
120
- # Merge with last Tag
121
- if _type is TokenType .OBJ :
122
- assert isinstance (_token , Tag )
123
- last_processed_token [0 ](* _token .children )
124
- else :
125
- last_processed_token [0 ](_token )
126
- else :
127
- combined_tokens .append ((_token , _type ))
128
- else :
129
- combined_tokens .append ((_token , _type ))
130
-
131
- if _token == "]" : open_square_braces -= 1
132
- elif _token == ")" : open_parenthesis -= 1
65
+ FIELDS = ('type' , 'rtype' , 'ytype' , 'returntype' , 'yieldtype' )
133
66
134
- else :
135
- # the token will be processed in _convert_type_spec_to_stan() method.
136
- combined_tokens .append ((_token , _type ))
67
+ def __init__ (self , annotation : nodes .document ,
68
+ warns_on_unknown_tokens : bool = False ,
69
+ lineno : int = 0 ) -> None :
70
+ super ().__init__ (fields = ())
71
+
72
+ tokenizer = NodeTokenizer (annotation ,
73
+ warns_on_unknown_tokens = warns_on_unknown_tokens )
74
+ self ._tokens = tokenizer .tokens
75
+ self .warnings = tokenizer .warnings
76
+ self ._lineno = lineno
77
+ self ._document = self ._parse_tokens ()
78
+
79
+ @property
80
+ def has_body (self ) -> bool :
81
+ return len (self ._tokens )> 0
137
82
138
- return combined_tokens
83
+ def to_node (self ) -> nodes .document :
84
+ return self ._document
85
+
86
+ _converters : Dict [TokenType , Callable [[str , int ], nodes .Node ]] = {
87
+ TokenType .LITERAL : lambda _token , _ : nodes .inline (
88
+ # we're re-using the STRING_TAG css
89
+ # class for the whole literal token, it's the
90
+ # best approximation we have for now.
91
+ _token , _token , classes = [PyvalColorizer .STRING_TAG ]),
92
+ TokenType .CONTROL : lambda _token , _ : nodes .emphasis (_token , _token ),
93
+ TokenType .OBJ : lambda _token , lineno : set_node_attributes (
94
+ nodes .title_reference (_token , _token ), lineno = lineno ),
95
+ }
139
96
140
- def _convert_type_spec_to_stan (self , docstring_linker : DocstringLinker ) -> Tag :
97
+ def _parse_tokens (self ) -> nodes . document :
141
98
"""
142
- Convert type to L{Tag} object.
99
+ Convert type to docutils document object.
143
100
"""
144
101
145
- tokens = self ._convert_obj_tokens_to_stan (self ._tokens , docstring_linker )
146
-
147
- warnings : List [ParseError ] = []
148
-
149
- converters : Dict [TokenType , Callable [[Union [str , Tag ]], Union [str , Tag ]]] = {
150
- TokenType .LITERAL : lambda _token : tags .span (_token , class_ = "literal" ),
151
- TokenType .CONTROL : lambda _token : tags .em (_token ),
152
- # We don't use safe_to_stan() here, if these converter functions raise an exception,
153
- # the whole type docstring will be rendered as plaintext.
154
- # it does not crash on invalid xml entities
155
- TokenType .REFERENCE : lambda _token : get_parser_by_name ('restructuredtext' )(_token , warnings ).to_stan (docstring_linker ) if isinstance (_token , str ) else _token ,
156
- TokenType .UNKNOWN : lambda _token : get_parser_by_name ('restructuredtext' )(_token , warnings ).to_stan (docstring_linker ) if isinstance (_token , str ) else _token ,
157
- TokenType .OBJ : lambda _token : _token , # These convertions (OBJ and DELIMITER) are done in _convert_obj_tokens_to_stan().
158
- TokenType .DELIMITER : lambda _token : _token ,
159
- TokenType .ANY : lambda _token : _token ,
160
- }
102
+ document = new_document ('code' )
161
103
162
- for w in warnings :
163
- self .warnings . append ( w . descr ())
104
+ converters = self . _converters
105
+ lineno = self ._lineno
164
106
165
- converted = Tag ('' )
107
+ elements : list [nodes .Node ] = []
108
+ default = lambda _token , _ : nodes .Text (_token )
166
109
167
- for token , type_ in tokens :
110
+ for _tok in self ._tokens :
111
+ token , type_ = _tok .value , _tok .type
168
112
assert token is not None
169
- if isinstance (token , nodes .Node ):
170
- token = node2stan (token , docstring_linker )
171
- assert isinstance (token , (str , Tag ))
172
- converted_token = converters [type_ ](token )
173
- converted (converted_token )
113
+ converted_token : nodes .Node
114
+
115
+ if type_ is TokenType .ANY :
116
+ assert isinstance (token , nodes .Node )
117
+ converted_token = token
118
+ else :
119
+ assert isinstance (token , str )
120
+ converted_token = converters .get (type_ , default )(token , lineno )
121
+
122
+ elements .append (set_node_attributes (converted_token ,
123
+ document = document ))
124
+
125
+ return set_node_attributes (document , children = [
126
+ set_node_attributes (code ('' , '' ),
127
+ children = elements ,
128
+ document = document ,
129
+ lineno = lineno + 1 )])
130
+ # the +1 here is coping with the fact that
131
+ # Field.lineno are 0-based but the docutils tree
132
+ # is supposed to be 1-based
174
133
175
- return converted
0 commit comments