Skip to content

Commit 4c12cbd

Browse files
committed
Fix external links with anchors being rewritten as relative doc links
Protect <a href="https?://..."> links from linkify so identifiers in the URL fragment (e.g. #django.http.HttpResponseNotFound) or in the link text are not turned into relative documentation links. Mask such links with placeholders before linkify, then restore them after.
1 parent 16e00b6 commit 4c12cbd

2 files changed

Lines changed: 53 additions & 4 deletions

File tree

pdoc/render_helpers.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,31 @@ def module_candidates(identifier: str, current_module: str) -> Iterable[str]:
306306
yield identifier
307307

308308

309+
@contextmanager
310+
def shield_fragments(code: str, pattern: str | re.Pattern[str], flags: int = 0):
311+
"""
312+
Context manager that shields regex matches in `code` from being modified,
313+
then restores them afterward.
314+
315+
Yields a tuple ``(shielded_code, restore)`` where ``shielded_code`` is the
316+
input with all matches replaced by unique placeholders, and ``restore`` is a
317+
callable that replaces the placeholders back with the original matches.
318+
"""
319+
placeholders: list[str] = []
320+
321+
def save(m: re.Match[str]) -> str:
322+
placeholders.append(m.group(0))
323+
return f"\u200b\u200bPDOC_FRAGMENT_{len(placeholders) - 1}\u200b\u200b"
324+
325+
def restore(text: str) -> str:
326+
for i, original in enumerate(placeholders):
327+
text = text.replace(f"\u200b\u200bPDOC_FRAGMENT_{i}\u200b\u200b", original)
328+
return text
329+
330+
shielded = re.sub(pattern, save, code, flags=flags)
331+
yield shielded, restore
332+
333+
309334
@pass_context
310335
def linkify(
311336
context: Context, code: str, namespace: str = "", shorten: bool = True
@@ -319,7 +344,6 @@ def linkify(
319344
For example, replace "current_module.Foo" with "Foo". This is useful for annotations
320345
(which are verbose), but undesired for docstrings (where we want to preserve intent).
321346
"""
322-
323347
def linkify_repl(m: re.Match):
324348
"""
325349
Resolve `text` to the most suitable documentation object.
@@ -401,8 +425,10 @@ def linkify_repl(m: re.Match):
401425
# No matches found.
402426
return text
403427

404-
return Markup(
405-
re.sub(
428+
with shield_fragments(
429+
code, r'<a\s+href="https?://[^"]*"[^>]*>.*?</a>', re.DOTALL
430+
) as (code, restore):
431+
result = re.sub(
406432
r"""
407433
# Part 1: foo.bar or foo.bar() (without backticks)
408434
(?<![/=?#&\.]) # heuristic: not part of a URL
@@ -433,7 +459,7 @@ def linkify_repl(m: re.Match):
433459
code,
434460
flags=re.VERBOSE,
435461
)
436-
)
462+
return Markup(restore(result))
437463

438464

439465
@pass_context

test/test_render_helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from pdoc.render_helpers import edit_url
8+
from pdoc.render_helpers import linkify
89
from pdoc.render_helpers import module_candidates
910
from pdoc.render_helpers import possible_sources
1011
from pdoc.render_helpers import qualname_candidates
@@ -161,3 +162,25 @@ def test_mixed_toc():
161162
)
162163
def test_markdown_autolink(md, html):
163164
assert to_html(md) == html
165+
166+
167+
def test_external_link_with_anchor_preserved():
168+
"""
169+
External links with anchors must not be rewritten as relative documentation links.
170+
"""
171+
md = "See [HttpResponseNotFound](https://docs.djangoproject.com/en/6.0/ref/request-response/#django.http.HttpResponseNotFound)."
172+
html = to_html(md)
173+
174+
# Minimal context so linkify runs (no modules to link to); tests that external links are preserved.
175+
class FakeModule:
176+
modulename = "test"
177+
178+
def get(self, name):
179+
return None
180+
181+
ctx = {"module": FakeModule(), "all_modules": {}, "is_public": lambda doc: ""}
182+
result = str(linkify(ctx, html))
183+
assert (
184+
"https://docs.djangoproject.com/en/6.0/ref/request-response/#django.http.HttpResponseNotFound"
185+
in result
186+
)

0 commit comments

Comments
 (0)