Skip to content

Commit 1f146a9

Browse files
authored
Refactor the Linker and ParsedTypeDocstring (#874)
* Make the linker always output link tags only. No <code> tags. The <code> tags are now added by the html translator when the document is a docstring. Otherwise it does not add the enclosing <code> tags because we're already in the middle of a code tag or similar <span class="rst-literal">. * Fix #723 and #581 * Fix a duplicate warning issue in the Attributes section's references
1 parent 8e4c9bd commit 1f146a9

26 files changed

+696
-480
lines changed

README.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ What's New?
7373
in development
7474
^^^^^^^^^^^^^^
7575

76+
* Fix bug that would result in duplicated "Cannot find link target" warnings when the
77+
types under a docstring *Attributes* section failed to resolved.
78+
7679
pydoctor 25.4.0
7780
^^^^^^^^^^^^^^^
7881

docs/google_demo/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def function_with_types_in_docstring(param1, param2):
5555
5656
Args:
5757
param1 (int): The first parameter.
58-
param2 (str): The second parameter.
58+
param2 (str, {"html", "json", "xml"}, optional): The second parameter.
5959
6060
Returns:
6161
bool: The return value. True for success, False otherwise.

docs/numpy_demo/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def function_with_types_in_docstring(param1, param2):
7373
----------
7474
param1 : int
7575
The first parameter.
76-
param2 : str
76+
param2 : str : {"html", "json", "xml"}, optional
7777
The second parameter.
7878
7979
Returns

docs/tests/test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ def test_search(query:str, expected:List[str], order_is_important:bool=True) ->
195195
to_stan_results = [
196196
'pydoctor.epydoc.markup.ParsedDocstring.to_stan',
197197
'pydoctor.epydoc.markup.plaintext.ParsedPlaintextDocstring.to_stan',
198-
'pydoctor.epydoc.markup._types.ParsedTypeDocstring.to_stan',
199198
'pydoctor.epydoc.markup._pyval_repr.ColorizedPyvalRepr.to_stan',
200199
]
201200
test_search('to_stan*', to_stan_results, order_is_important=False)

pydoctor/epydoc/docutils.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
from __future__ import annotations
55

6-
from typing import Iterable, Iterator, Optional, TypeVar, cast
6+
from typing import Iterable, Iterator, Optional, TypeVar, cast, TYPE_CHECKING
7+
8+
if TYPE_CHECKING:
9+
from typing import Literal
710

811
import optparse
912

@@ -14,11 +17,11 @@
1417

1518
_DEFAULT_DOCUTILS_SETTINGS: Optional[optparse.Values] = None
1619

17-
def new_document(source_path: str, settings: Optional[optparse.Values] = None) -> nodes.document:
20+
def new_document(source: Literal['docstring', 'code'], settings: Optional[optparse.Values] = None) -> nodes.document:
1821
"""
1922
Create a new L{nodes.document} using the provided settings or cached default settings.
2023
21-
@returns: L{nodes.document}
24+
@returns: L{nodes.document} with a C{source} attribute that matches the provided source.
2225
"""
2326
global _DEFAULT_DOCUTILS_SETTINGS
2427
# If we have docutils >= 0.19 we use get_default_settings to calculate and cache
@@ -29,7 +32,7 @@ def new_document(source_path: str, settings: Optional[optparse.Values] = None) -
2932

3033
settings = _DEFAULT_DOCUTILS_SETTINGS
3134

32-
return utils.new_document(source_path, settings)
35+
return utils.new_document(source, settings)
3336

3437
def _set_nodes_parent(nodes: Iterable[nodes.Node], parent: nodes.Element) -> Iterator[nodes.Node]:
3538
"""
@@ -145,7 +148,11 @@ def get_first_parent_lineno(_node: nodes.Element | None) -> int:
145148
return line
146149

147150
if node.line:
151+
# If the line is explicitely set, assume it's zero-based
148152
line = node.line
153+
# If docutils suddenly starts populating the line attribute for
154+
# title_reference node, all RST xref warnings will off by 1 :/
155+
149156
else:
150157
line = get_first_parent_lineno(node.parent)
151158

pydoctor/epydoc/markup/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def get_supported_docformats() -> Iterator[str]:
8888

8989
def get_parser_by_name(docformat: str, objclass: ObjClass | None = None) -> ParserFunction:
9090
"""
91-
Get the C{parse_docstring(str, List[ParseError], bool) -> ParsedDocstring} function based on a parser name.
91+
Get the C{parse_docstring(str, List[ParseError]) -> ParsedDocstring} function based on a parser name.
9292
9393
@raises ImportError: If the parser could not be imported, probably meaning that your are missing a dependency
9494
or it could be that the docformat name do not match any know L{pydoctor.epydoc.markup} submodules.
@@ -113,7 +113,7 @@ def _processtypes(doc: 'ParsedDocstring', errs: List['ParseError']) -> None:
113113
for field in doc.fields:
114114
if field.tag() in ParsedTypeDocstring.FIELDS:
115115
body = ParsedTypeDocstring(field.body().to_node(), lineno=field.lineno)
116-
append_warnings(body.warnings, errs, lineno=field.lineno+1)
116+
append_warnings(body.warnings, errs, lineno=field.lineno)
117117
field.replace_body(body)
118118

119119
def parse_and_processtypes(doc:str, errs:List['ParseError']) -> 'ParsedDocstring':
@@ -149,8 +149,8 @@ def __init__(self, fields: Sequence['Field']):
149149
"""
150150
self._stan: Optional[Tag] = None
151151

152-
@property
153-
@abc.abstractmethod
152+
@property
153+
@abc.abstractmethod
154154
def has_body(self) -> bool:
155155
"""
156156
Does this docstring have a non-empty body?
@@ -168,7 +168,7 @@ def get_toc(self, depth: int) -> Optional['ParsedDocstring']:
168168
except NotImplementedError:
169169
return None
170170
contents = build_table_of_content(document, depth=depth)
171-
docstring_toc = new_document('toc')
171+
docstring_toc = new_document('docstring')
172172
if contents:
173173
docstring_toc.extend(contents)
174174
return ParsedRstDocstring(docstring_toc, ())
@@ -228,7 +228,7 @@ def get_summary(self) -> 'ParsedDocstring':
228228

229229
def parsed_text(text: str,
230230
klass: str | None = None,
231-
source: str = 'docstring') -> ParsedDocstring:
231+
source: Literal['docstring', 'code'] = 'docstring') -> ParsedDocstring:
232232
"""
233233
Create a parsed representation of a simple text
234234
with a given class (or no class at all).
@@ -455,7 +455,7 @@ def visit_paragraph(self, node: nodes.paragraph) -> None:
455455
self.other_docs = True
456456
raise nodes.StopTraversal()
457457

458-
summary_doc = new_document('summary')
458+
summary_doc = new_document('docstring')
459459
summary_pieces: list[nodes.Node] = []
460460

461461
# Extract the first sentences from the first paragraph until maximum number

pydoctor/epydoc/markup/_pyval_repr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def colorize(self, pyval: Any) -> ColorizedPyvalRepr:
339339
is_complete = True
340340

341341
# Put it all together.
342-
document = new_document('pyval_repr')
342+
document = new_document('code')
343343
# This ensure the .parent and .document attributes of the child nodes are set correcly.
344344
set_node_attributes(document, children=[set_node_attributes(node, document=document) for node in state.result])
345345
return ColorizedPyvalRepr(document, is_complete, state.warnings)

pydoctor/epydoc/markup/_types.py

Lines changed: 84 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -5,66 +5,35 @@
55
"""
66
from __future__ import annotations
77

8-
from typing import Any, Callable, Dict, List, Tuple, Union, cast
8+
from typing import Callable, Dict
99

10-
from pydoctor.epydoc.markup import DocstringLinker, ParseError, ParsedDocstring, get_parser_by_name
11-
from pydoctor.node2stan import node2stan
12-
from pydoctor.napoleon.docstring import TokenType, TypeDocstring
10+
from pydoctor.epydoc.markup import ParsedDocstring
11+
from pydoctor.epydoc.markup._pyval_repr import PyvalColorizer
12+
from pydoctor.napoleon.docstring import TokenType, ITokenizer, Tokenizer
13+
from pydoctor.epydoc.docutils import new_document, set_node_attributes, code
1314

1415
from docutils import nodes
15-
from twisted.web.template import Tag, tags
1616

17-
class ParsedTypeDocstring(TypeDocstring, ParsedDocstring):
17+
class NodeTokenizer(ITokenizer[nodes.document]):
1818
"""
19-
Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
20-
allow to parse types from L{nodes.Node} objects, providing the C{--process-types} option.
19+
A type tokenizer for annotation as docutils L{document <nodes.document>}.
2120
"""
2221

23-
FIELDS = ('type', 'rtype', 'ytype', 'returntype', 'yieldtype')
22+
def __init__(self, annotation: nodes.document, *,
23+
warns_on_unknown_tokens: bool) -> None:
24+
# build tokens and warnings
25+
self.warnings = warnings = [] # type: list[str]
26+
raw_tokens = Tokenizer.recombine_sets(self.tokenize_document(annotation, warnings))
27+
self.tokens = Tokenizer.build(raw_tokens, warnings, warns_on_unknown_tokens)
2428

25-
# yes this overrides the superclass type!
26-
_tokens: list[tuple[str | nodes.Node, TokenType]] # type: ignore
27-
28-
def __init__(self, annotation: Union[nodes.document, str],
29-
warns_on_unknown_tokens: bool = False, lineno: int = 0) -> None:
30-
ParsedDocstring.__init__(self, ())
31-
if isinstance(annotation, nodes.document):
32-
TypeDocstring.__init__(self, '', warns_on_unknown_tokens)
33-
34-
_tokens = self._tokenize_node_type_spec(annotation)
35-
self._tokens = cast('list[tuple[str | nodes.Node, TokenType]]',
36-
self._build_tokens(_tokens))
37-
self._trigger_warnings()
38-
else:
39-
TypeDocstring.__init__(self, annotation, warns_on_unknown_tokens)
40-
41-
42-
# We need to store the line number because we need to pass it to DocstringLinker.link_xref
43-
self._lineno = lineno
44-
45-
@property
46-
def has_body(self) -> bool:
47-
return len(self._tokens)>0
48-
49-
def to_node(self) -> nodes.document:
50-
"""
51-
Not implemented at this time :/
52-
"""
53-
#TODO: Fix this soon - PR https://github.com/twisted/pydoctor/pull/874
54-
raise NotImplementedError()
55-
56-
def to_stan(self, docstring_linker: DocstringLinker) -> Tag:
57-
"""
58-
Present the type as a stan tree.
59-
"""
60-
return self._convert_type_spec_to_stan(docstring_linker)
61-
62-
def _tokenize_node_type_spec(self, spec: nodes.document) -> List[Union[str, nodes.Node]]:
29+
@staticmethod
30+
def tokenize_document(spec: nodes.document, warnings: list[str]) -> list[str | nodes.Node]:
6331
def _warn_not_supported(n:nodes.Node) -> None:
64-
self.warnings.append(f"Unexpected element in type specification field: element '{n.__class__.__name__}'. "
65-
"This value should only contain text or inline markup.")
32+
warnings.append("Unexpected element in type specification field: "
33+
f"element '{n.__class__.__name__}'. This value should "
34+
"only contain text or inline markup.")
6635

67-
tokens: List[Union[str, nodes.Node]] = []
36+
tokens: list[str | nodes.Node] = []
6837
# Determine if the content is nested inside a paragraph
6938
# this is generally the case, except for consolidated fields generate documents.
7039
if spec.children and isinstance(spec.children[0], nodes.paragraph):
@@ -77,99 +46,88 @@ def _warn_not_supported(n:nodes.Node) -> None:
7746
for child in children:
7847
if isinstance(child, nodes.Text):
7948
# Tokenize the Text node with the same method TypeDocstring uses.
80-
tokens.extend(TypeDocstring._tokenize_type_spec(child.astext()))
49+
tokens.extend(Tokenizer.tokenize_str(child.astext()))
8150
elif isinstance(child, nodes.Inline):
8251
tokens.append(child)
8352
else:
8453
_warn_not_supported(child)
8554

8655
return tokens
8756

88-
def _convert_obj_tokens_to_stan(self, tokens: List[Tuple[Any, TokenType]],
89-
docstring_linker: DocstringLinker) -> list[tuple[Any, TokenType]]:
90-
"""
91-
Convert L{TokenType.OBJ} and PEP 484 like L{TokenType.DELIMITER} type to stan, merge them together. Leave the rest untouched.
9257

93-
@param tokens: List of tuples: C{(token, type)}
94-
"""
58+
class ParsedTypeDocstring(ParsedDocstring):
59+
"""
60+
Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
61+
allow to parse types from L{nodes.Node} objects,
62+
providing the C{--process-types} option.
63+
"""
9564

96-
combined_tokens: list[tuple[Any, TokenType]] = []
97-
98-
open_parenthesis = 0
99-
open_square_braces = 0
100-
101-
for _token, _type in tokens:
102-
# The actual type of_token is str | Tag | Node.
103-
104-
if (_type is TokenType.DELIMITER and _token in ('[', '(', ')', ']')) \
105-
or _type is TokenType.OBJ:
106-
if _token == "[": open_square_braces += 1
107-
elif _token == "(": open_parenthesis += 1
108-
109-
if _type is TokenType.OBJ:
110-
_token = docstring_linker.link_xref(
111-
_token, _token, self._lineno)
112-
113-
if open_square_braces + open_parenthesis > 0:
114-
try: last_processed_token = combined_tokens[-1]
115-
except IndexError:
116-
combined_tokens.append((_token, _type))
117-
else:
118-
if last_processed_token[1] is TokenType.OBJ \
119-
and isinstance(last_processed_token[0], Tag):
120-
# Merge with last Tag
121-
if _type is TokenType.OBJ:
122-
assert isinstance(_token, Tag)
123-
last_processed_token[0](*_token.children)
124-
else:
125-
last_processed_token[0](_token)
126-
else:
127-
combined_tokens.append((_token, _type))
128-
else:
129-
combined_tokens.append((_token, _type))
130-
131-
if _token == "]": open_square_braces -= 1
132-
elif _token == ")": open_parenthesis -= 1
65+
FIELDS = ('type', 'rtype', 'ytype', 'returntype', 'yieldtype')
13366

134-
else:
135-
# the token will be processed in _convert_type_spec_to_stan() method.
136-
combined_tokens.append((_token, _type))
67+
def __init__(self, annotation: nodes.document,
68+
warns_on_unknown_tokens: bool = False,
69+
lineno: int = 0) -> None:
70+
super().__init__(fields=())
71+
72+
tokenizer = NodeTokenizer(annotation,
73+
warns_on_unknown_tokens=warns_on_unknown_tokens)
74+
self._tokens = tokenizer.tokens
75+
self.warnings = tokenizer.warnings
76+
self._lineno = lineno
77+
self._document = self._parse_tokens()
78+
79+
@property
80+
def has_body(self) -> bool:
81+
return len(self._tokens)>0
13782

138-
return combined_tokens
83+
def to_node(self) -> nodes.document:
84+
return self._document
85+
86+
_converters: Dict[TokenType, Callable[[str, int], nodes.Node]] = {
87+
TokenType.LITERAL: lambda _token, _: nodes.inline(
88+
# we're re-using the STRING_TAG css
89+
# class for the whole literal token, it's the
90+
# best approximation we have for now.
91+
_token, _token, classes=[PyvalColorizer.STRING_TAG]),
92+
TokenType.CONTROL: lambda _token, _: nodes.emphasis(_token, _token),
93+
TokenType.OBJ: lambda _token, lineno: set_node_attributes(
94+
nodes.title_reference(_token, _token), lineno=lineno),
95+
}
13996

140-
def _convert_type_spec_to_stan(self, docstring_linker: DocstringLinker) -> Tag:
97+
def _parse_tokens(self) -> nodes.document:
14198
"""
142-
Convert type to L{Tag} object.
99+
Convert type to docutils document object.
143100
"""
144101

145-
tokens = self._convert_obj_tokens_to_stan(self._tokens, docstring_linker)
146-
147-
warnings: List[ParseError] = []
148-
149-
converters: Dict[TokenType, Callable[[Union[str, Tag]], Union[str, Tag]]] = {
150-
TokenType.LITERAL: lambda _token: tags.span(_token, class_="literal"),
151-
TokenType.CONTROL: lambda _token: tags.em(_token),
152-
# We don't use safe_to_stan() here, if these converter functions raise an exception,
153-
# the whole type docstring will be rendered as plaintext.
154-
# it does not crash on invalid xml entities
155-
TokenType.REFERENCE: lambda _token: get_parser_by_name('restructuredtext')(_token, warnings).to_stan(docstring_linker) if isinstance(_token, str) else _token,
156-
TokenType.UNKNOWN: lambda _token: get_parser_by_name('restructuredtext')(_token, warnings).to_stan(docstring_linker) if isinstance(_token, str) else _token,
157-
TokenType.OBJ: lambda _token: _token, # These convertions (OBJ and DELIMITER) are done in _convert_obj_tokens_to_stan().
158-
TokenType.DELIMITER: lambda _token: _token,
159-
TokenType.ANY: lambda _token: _token,
160-
}
102+
document = new_document('code')
161103

162-
for w in warnings:
163-
self.warnings.append(w.descr())
104+
converters = self._converters
105+
lineno = self._lineno
164106

165-
converted = Tag('')
107+
elements: list[nodes.Node] = []
108+
default = lambda _token, _: nodes.Text(_token)
166109

167-
for token, type_ in tokens:
110+
for _tok in self._tokens:
111+
token, type_ = _tok.value, _tok.type
168112
assert token is not None
169-
if isinstance(token, nodes.Node):
170-
token = node2stan(token, docstring_linker)
171-
assert isinstance(token, (str, Tag))
172-
converted_token = converters[type_](token)
173-
converted(converted_token)
113+
converted_token: nodes.Node
114+
115+
if type_ is TokenType.ANY:
116+
assert isinstance(token, nodes.Node)
117+
converted_token = token
118+
else:
119+
assert isinstance(token, str)
120+
converted_token = converters.get(type_, default)(token, lineno)
121+
122+
elements.append(set_node_attributes(converted_token,
123+
document=document))
124+
125+
return set_node_attributes(document, children=[
126+
set_node_attributes(code('', ''),
127+
children=elements,
128+
document=document,
129+
lineno=lineno+1)])
130+
# the +1 here is coping with the fact that
131+
# Field.lineno are 0-based but the docutils tree
132+
# is supposed to be 1-based
174133

175-
return converted

0 commit comments

Comments
 (0)