Skip to content

Commit f6cb786

Browse files
committed
wip
1 parent dcc7868 commit f6cb786

File tree

5 files changed

+230
-66
lines changed

5 files changed

+230
-66
lines changed

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ theme:
4040
- content.tooltips
4141
- navigation.footer
4242
- navigation.indexes
43+
- navigation.instant.preview
4344
- navigation.sections
4445
- navigation.tabs
4546
- navigation.tabs.sticky

src/mkdocs_autorefs/plugin.py

Lines changed: 104 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import contextlib
1616
import functools
1717
import logging
18+
from collections import defaultdict
1819
from pathlib import PurePosixPath as URL # noqa: N814
1920
from typing import TYPE_CHECKING, Any, Callable
2021
from urllib.parse import urlsplit
@@ -24,8 +25,11 @@
2425
from mkdocs.config.config_options import Type
2526
from mkdocs.plugins import BasePlugin
2627
from mkdocs.structure.pages import Page
28+
from mkdocs.structure.files import Files
29+
from mkdocs.structure.nav import Section
30+
from jinja2.environment import Environment
2731

28-
from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
32+
from mkdocs_autorefs.references import AutorefsExtension, URLAndTitle, _find_backlinks, fix_refs, relative_url
2933

3034
if TYPE_CHECKING:
3135
from collections.abc import Sequence
@@ -43,6 +47,15 @@
4347
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
4448

4549

50+
# TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
51+
# to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
52+
# Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
53+
# Also also, backlinks should be fetched for all aliases of a given identifier,
54+
# not just for this specific identifier. For example, mkdocstrings-python will create
55+
# an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
56+
# But if we don't render the object with this canonical path but instead `object.path`,
57+
# then we won't find the backlinks for it.
58+
4659
class AutorefsConfig(Config):
4760
"""Configuration options for the `autorefs` plugin."""
4861

@@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
7689
"""
7790

7891
scan_toc: bool = True
79-
current_page: str | None = None
92+
current_page: Page | None = None
8093
# YORE: Bump 2: Remove line.
8194
legacy_refs: bool = True
8295

@@ -111,7 +124,9 @@ def __init__(self) -> None:
111124
# This logic unfolds in `_get_item_url`.
112125
self._primary_url_map: dict[str, list[str]] = {}
113126
self._secondary_url_map: dict[str, list[str]] = {}
127+
self._title_map: dict[str, str] = {}
114128
self._abs_url_map: dict[str, str] = {}
129+
self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
115130
# YORE: Bump 2: Remove line.
116131
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
117132

@@ -133,22 +148,68 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
133148
stacklevel=2,
134149
)
135150

136-
def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
151+
def _record_backlink(self, identifier: str, backlink_type: str, backlink_url: str) -> None:
152+
"""Record a backlink.
153+
154+
Arguments:
155+
identifier: The target identifier.
156+
backlink_type: The type of backlink.
157+
backlink_url: The URL of the backlink.
158+
"""
159+
if identifier in self._primary_url_map or identifier in self._secondary_url_map:
160+
self._backlinks[identifier][backlink_type].add(backlink_url)
161+
162+
def get_relative_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[URLAndTitle]]:
163+
"""Return the backlinks to an identifier relative to the current page.
164+
165+
Arguments:
166+
*identifiers: The identifiers to get backlinks for.
167+
from_url: The URL of the current page.
168+
169+
Returns:
170+
A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
171+
"""
172+
relative_backlinks: dict[str, set[URLAndTitle]] = defaultdict(set)
173+
for identifier in identifiers:
174+
backlinks = self._backlinks.get(identifier, {})
175+
for backlink_type, backlink_urls in backlinks.items():
176+
for backlink_url in backlink_urls:
177+
relative_backlinks[backlink_type].add((relative_url(from_url, backlink_url), self._title_map[backlink_url]))
178+
return relative_backlinks
179+
180+
def _breadcrumbs(self, page: Page | Section, title: str) -> str:
181+
breadcrumbs = [title, page.title]
182+
while page.parent:
183+
page = page.parent
184+
breadcrumbs.append(page.title)
185+
return " ❭ ".join(reversed(breadcrumbs))
186+
187+
def register_anchor(
188+
self,
189+
identifier: str,
190+
anchor: str | None = None,
191+
*,
192+
title: str | None = None,
193+
primary: bool = True,
194+
) -> None:
137195
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
138196
139197
Arguments:
140-
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
141198
identifier: The identifier to register.
142199
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
200+
title: The title of the anchor (optional).
143201
primary: Whether this anchor is the primary one for the identifier.
144202
"""
145-
page_anchor = f"{page}#{anchor or identifier}"
203+
page_anchor = f"{self.current_page.url}#{anchor or identifier}"
146204
url_map = self._primary_url_map if primary else self._secondary_url_map
147205
if identifier in url_map:
148206
if page_anchor not in url_map[identifier]:
149207
url_map[identifier].append(page_anchor)
150208
else:
151209
url_map[identifier] = [page_anchor]
210+
if title and page_anchor not in self._title_map:
211+
title = self._breadcrumbs(self.current_page, title) if self.current_page else title
212+
self._title_map[page_anchor] = title
152213

153214
def register_url(self, identifier: str, url: str) -> None:
154215
"""Register that the identifier should be turned into a link to this URL.
@@ -240,7 +301,7 @@ def get_item_url(
240301
from_url: str | None = None,
241302
# YORE: Bump 2: Remove line.
242303
fallback: Callable[[str], Sequence[str]] | None = None,
243-
) -> str:
304+
) -> URLAndTitle:
244305
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
245306
246307
Arguments:
@@ -252,11 +313,12 @@ def get_item_url(
252313
"""
253314
# YORE: Bump 2: Replace `, fallback` with `` within line.
254315
url = self._get_item_url(identifier, from_url, fallback)
316+
title = self._title_map.get(url) or None
255317
if from_url is not None:
256318
parsed = urlsplit(url)
257319
if not parsed.scheme and not parsed.netloc:
258-
return relative_url(from_url, url)
259-
return url
320+
url = relative_url(from_url, url)
321+
return url, title
260322

261323
def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
262324
"""Instantiate our Markdown extension.
@@ -287,7 +349,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
287349
The same Markdown. We only use this hook to keep a reference to the current page URL,
288350
used during Markdown conversion by the anchor scanner tree processor.
289351
"""
290-
self.current_page = page.url
352+
self.current_page = page
291353
return markdown
292354

293355
def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
@@ -306,13 +368,15 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
306368
Returns:
307369
The same HTML. We only use this hook to map anchors to URLs.
308370
"""
371+
self.current_page = page
372+
# Collect `std`-domain URLs.
309373
if self.scan_toc:
310374
log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
311375
for item in page.toc.items:
312-
self.map_urls(page.url, item)
376+
self.map_urls(item)
313377
return html
314378

315-
def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
379+
def map_urls(self, anchor: AnchorLink) -> None:
316380
"""Recurse on every anchor to map its ID to its absolute URL.
317381
318382
This method populates `self._primary_url_map` by side-effect.
@@ -321,9 +385,32 @@ def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
321385
base_url: The base URL to use as a prefix for each anchor's relative URL.
322386
anchor: The anchor to process and to recurse on.
323387
"""
324-
self.register_anchor(base_url, anchor.id, primary=True)
388+
self.register_anchor(anchor.id, title=anchor.title, primary=True)
325389
for child in anchor.children:
326-
self.map_urls(base_url, child)
390+
self.map_urls(child)
391+
392+
def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment:
393+
"""Collect backlinks.
394+
395+
Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
396+
In this hook, we collect backlinks on every page. We do this now instead of in the `on_page_content` hook
397+
so that all anchors have been registered, and so we can use them to discard useless backlinks.
398+
399+
Arguments:
400+
env: The MkDocs environment.
401+
config: The MkDocs config object.
402+
files: The list of files in the MkDocs project.
403+
kwargs: Additional arguments passed by MkDocs.
404+
405+
Returns:
406+
The modified environment.
407+
"""
408+
for file in files:
409+
if file.page and file.page.content:
410+
log.debug("Recording backlinks for page %s", file.page.file.src_path)
411+
for identifier, backlink_type, backlink_anchor in _find_backlinks(file.page.content):
412+
self._record_backlink(identifier, backlink_type, f"{file.page.url}#{backlink_anchor}")
413+
return env
327414

328415
def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
329416
"""Fix cross-references.
@@ -348,6 +435,10 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa:
348435
"""
349436
log.debug("Fixing references in page %s", page.file.src_path)
350437

438+
self.current_page = page
439+
# TODO: We could get rid of `from_url` parameters and use `self.current_page` instead.
440+
# Then we could just make `fix_refs` a method of this class.
441+
351442
# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
352443
url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
353444
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.

0 commit comments

Comments
 (0)