15
15
import contextlib
16
16
import functools
17
17
import logging
18
+ from collections import defaultdict
18
19
from pathlib import PurePosixPath as URL # noqa: N814
19
20
from typing import TYPE_CHECKING , Any , Callable
20
21
from urllib .parse import urlsplit
24
25
from mkdocs .config .config_options import Type
25
26
from mkdocs .plugins import BasePlugin
26
27
from mkdocs .structure .pages import Page
28
+ from mkdocs .structure .files import Files
29
+ from mkdocs .structure .nav import Section
30
+ from jinja2 .environment import Environment
27
31
28
- from mkdocs_autorefs .references import AutorefsExtension , fix_refs , relative_url
32
+ from mkdocs_autorefs .references import AutorefsExtension , URLAndTitle , _find_backlinks , fix_refs , relative_url
29
33
30
34
if TYPE_CHECKING :
31
35
from collections .abc import Sequence
43
47
log = logging .getLogger (f"mkdocs.plugins.{ __name__ } " ) # type: ignore[assignment]
44
48
45
49
50
+ # TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
51
+ # to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
52
+ # Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
53
+ # Also also, backlinks should be fetched for all aliases of a given identifier,
54
+ # not just for this specific identifier. For example, mkdocstrings-python will create
55
+ # an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
56
+ # But if we don't render the object with this canonical path but instead `object.path`,
57
+ # then we won't find the backlinks for it.
58
+
46
59
class AutorefsConfig (Config ):
47
60
"""Configuration options for the `autorefs` plugin."""
48
61
@@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
76
89
"""
77
90
78
91
scan_toc : bool = True
79
- current_page : str | None = None
92
+ current_page : Page | None = None
80
93
# YORE: Bump 2: Remove line.
81
94
legacy_refs : bool = True
82
95
@@ -111,7 +124,9 @@ def __init__(self) -> None:
111
124
# This logic unfolds in `_get_item_url`.
112
125
self ._primary_url_map : dict [str , list [str ]] = {}
113
126
self ._secondary_url_map : dict [str , list [str ]] = {}
127
+ self ._title_map : dict [str , str ] = {}
114
128
self ._abs_url_map : dict [str , str ] = {}
129
+ self ._backlinks : dict [str , dict [str , set [str ]]] = defaultdict (lambda : defaultdict (set ))
115
130
# YORE: Bump 2: Remove line.
116
131
self ._get_fallback_anchor : Callable [[str ], tuple [str , ...]] | None = None
117
132
@@ -133,22 +148,68 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
133
148
stacklevel = 2 ,
134
149
)
135
150
136
- def register_anchor (self , page : str , identifier : str , anchor : str | None = None , * , primary : bool = True ) -> None :
151
+ def _record_backlink (self , identifier : str , backlink_type : str , backlink_url : str ) -> None :
152
+ """Record a backlink.
153
+
154
+ Arguments:
155
+ identifier: The target identifier.
156
+ backlink_type: The type of backlink.
157
+ backlink_url: The URL of the backlink.
158
+ """
159
+ if identifier in self ._primary_url_map or identifier in self ._secondary_url_map :
160
+ self ._backlinks [identifier ][backlink_type ].add (backlink_url )
161
+
162
+ def get_relative_backlinks (self , * identifiers : str , from_url : str ) -> dict [str , set [URLAndTitle ]]:
163
+ """Return the backlinks to an identifier relative to the current page.
164
+
165
+ Arguments:
166
+ *identifiers: The identifiers to get backlinks for.
167
+ from_url: The URL of the current page.
168
+
169
+ Returns:
170
+ A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
171
+ """
172
+ relative_backlinks : dict [str , set [URLAndTitle ]] = defaultdict (set )
173
+ for identifier in identifiers :
174
+ backlinks = self ._backlinks .get (identifier , {})
175
+ for backlink_type , backlink_urls in backlinks .items ():
176
+ for backlink_url in backlink_urls :
177
+ relative_backlinks [backlink_type ].add ((relative_url (from_url , backlink_url ), self ._title_map [backlink_url ]))
178
+ return relative_backlinks
179
+
180
+ def _breadcrumbs (self , page : Page | Section , title : str ) -> str :
181
+ breadcrumbs = [title , page .title ]
182
+ while page .parent :
183
+ page = page .parent
184
+ breadcrumbs .append (page .title )
185
+ return " ❭ " .join (reversed (breadcrumbs ))
186
+
187
+ def register_anchor (
188
+ self ,
189
+ identifier : str ,
190
+ anchor : str | None = None ,
191
+ * ,
192
+ title : str | None = None ,
193
+ primary : bool = True ,
194
+ ) -> None :
137
195
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
138
196
139
197
Arguments:
140
- page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
141
198
identifier: The identifier to register.
142
199
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
200
+ title: The title of the anchor (optional).
143
201
primary: Whether this anchor is the primary one for the identifier.
144
202
"""
145
- page_anchor = f"{ page } #{ anchor or identifier } "
203
+ page_anchor = f"{ self . current_page . url } #{ anchor or identifier } "
146
204
url_map = self ._primary_url_map if primary else self ._secondary_url_map
147
205
if identifier in url_map :
148
206
if page_anchor not in url_map [identifier ]:
149
207
url_map [identifier ].append (page_anchor )
150
208
else :
151
209
url_map [identifier ] = [page_anchor ]
210
+ if title and page_anchor not in self ._title_map :
211
+ title = self ._breadcrumbs (self .current_page , title ) if self .current_page else title
212
+ self ._title_map [page_anchor ] = title
152
213
153
214
def register_url (self , identifier : str , url : str ) -> None :
154
215
"""Register that the identifier should be turned into a link to this URL.
@@ -240,7 +301,7 @@ def get_item_url(
240
301
from_url : str | None = None ,
241
302
# YORE: Bump 2: Remove line.
242
303
fallback : Callable [[str ], Sequence [str ]] | None = None ,
243
- ) -> str :
304
+ ) -> URLAndTitle :
244
305
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
245
306
246
307
Arguments:
@@ -252,11 +313,12 @@ def get_item_url(
252
313
"""
253
314
# YORE: Bump 2: Replace `, fallback` with `` within line.
254
315
url = self ._get_item_url (identifier , from_url , fallback )
316
+ title = self ._title_map .get (url ) or None
255
317
if from_url is not None :
256
318
parsed = urlsplit (url )
257
319
if not parsed .scheme and not parsed .netloc :
258
- return relative_url (from_url , url )
259
- return url
320
+ url = relative_url (from_url , url )
321
+ return url , title
260
322
261
323
def on_config (self , config : MkDocsConfig ) -> MkDocsConfig | None :
262
324
"""Instantiate our Markdown extension.
@@ -287,7 +349,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
287
349
The same Markdown. We only use this hook to keep a reference to the current page URL,
288
350
used during Markdown conversion by the anchor scanner tree processor.
289
351
"""
290
- self .current_page = page . url
352
+ self .current_page = page
291
353
return markdown
292
354
293
355
def on_page_content (self , html : str , page : Page , ** kwargs : Any ) -> str : # noqa: ARG002
@@ -306,13 +368,15 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
306
368
Returns:
307
369
The same HTML. We only use this hook to map anchors to URLs.
308
370
"""
371
+ self .current_page = page
372
+ # Collect `std`-domain URLs.
309
373
if self .scan_toc :
310
374
log .debug ("Mapping identifiers to URLs for page %s" , page .file .src_path )
311
375
for item in page .toc .items :
312
- self .map_urls (page . url , item )
376
+ self .map_urls (item )
313
377
return html
314
378
315
- def map_urls (self , base_url : str , anchor : AnchorLink ) -> None :
379
+ def map_urls (self , anchor : AnchorLink ) -> None :
316
380
"""Recurse on every anchor to map its ID to its absolute URL.
317
381
318
382
This method populates `self._primary_url_map` by side-effect.
@@ -321,9 +385,32 @@ def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
321
385
base_url: The base URL to use as a prefix for each anchor's relative URL.
322
386
anchor: The anchor to process and to recurse on.
323
387
"""
324
- self .register_anchor (base_url , anchor .id , primary = True )
388
+ self .register_anchor (anchor . id , title = anchor .title , primary = True )
325
389
for child in anchor .children :
326
- self .map_urls (base_url , child )
390
+ self .map_urls (child )
391
+
392
+ def on_env (self , env : Environment , / , * , config : MkDocsConfig , files : Files ) -> Environment :
393
+ """Collect backlinks.
394
+
395
+ Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
396
+ In this hook, we collect backlinks on every page. We do this now instead of in the `on_page_content` hook
397
+ so that all anchors have been registered, and so we can use them to discard useless backlinks.
398
+
399
+ Arguments:
400
+ env: The MkDocs environment.
401
+ config: The MkDocs config object.
402
+ files: The list of files in the MkDocs project.
403
+ kwargs: Additional arguments passed by MkDocs.
404
+
405
+ Returns:
406
+ The modified environment.
407
+ """
408
+ for file in files :
409
+ if file .page and file .page .content :
410
+ log .debug ("Recording backlinks for page %s" , file .page .file .src_path )
411
+ for identifier , backlink_type , backlink_anchor in _find_backlinks (file .page .content ):
412
+ self ._record_backlink (identifier , backlink_type , f"{ file .page .url } #{ backlink_anchor } " )
413
+ return env
327
414
328
415
def on_post_page (self , output : str , page : Page , ** kwargs : Any ) -> str : # noqa: ARG002
329
416
"""Fix cross-references.
@@ -348,6 +435,10 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa:
348
435
"""
349
436
log .debug ("Fixing references in page %s" , page .file .src_path )
350
437
438
+ self .current_page = page
439
+ # TODO: We could get rid of `from_url` parameters and use `self.current_page` instead.
440
+ # Then we could just make `fix_refs` a method of this class.
441
+
351
442
# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
352
443
url_mapper = functools .partial (self .get_item_url , from_url = page .url , fallback = self .get_fallback_anchor )
353
444
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
0 commit comments