Skip to content

Commit c758c63

Browse files
committed
replace html5lib with nh3
We're only using the santizer part of html5lib, and it's being deprecated. It seems nh3 is the recommended replacement at this time. This change eliminates a series of Deprecation Warnings.
1 parent 8a5053e commit c758c63

4 files changed

Lines changed: 8 additions & 18 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ classifiers = [
2626
]
2727
dynamic = ["version",]
2828
dependencies = [
29-
'html5lib>=1.0.1',
29+
'nh3',
3030
'regex>1.0; implementation_name != "pypy"',
3131
]
3232
requires-python = '>=3.8'

tests/test_textile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_sanitize():
117117
assert result == expect
118118

119119
test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
120-
result = '<p style="">a paragraph of evil text</p>'
120+
result = '<p>a paragraph of evil text</p>'
121121
expect = textile.Textile().parse(test, sanitize=True)
122122
assert result == expect
123123

textile/core.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
import uuid
2121
from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
2222
from collections import OrderedDict
23+
from nh3 import clean
2324

24-
from textile.tools import sanitizer, imagesize
25+
from textile.tools import imagesize
2526
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
2627
regex_snippets, syms_re_s, table_span_re_s)
2728
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
@@ -236,12 +237,12 @@ def parse(self, text, rel=None, sanitize=False):
236237

237238
if self.block_tags:
238239
if self.lite:
239-
self.blocktag_whitelist = ['bq', 'p']
240+
self.blocktag_allowlist = set(['bq', 'p', 'br'])
240241
text = self.block(text)
241242
else:
242-
self.blocktag_whitelist = ['bq', 'p', 'bc', 'notextile',
243+
self.blocktag_allowlist = set(['bq', 'p', 'br', 'bc', 'notextile',
243244
'pre', 'h[1-6]', 'fn{0}+'.format(
244-
regex_snippets['digit']), '###']
245+
regex_snippets['digit']), '###'])
245246
text = self.block(text)
246247
text = self.placeNoteLists(text)
247248
else:
@@ -263,7 +264,7 @@ def parse(self, text, rel=None, sanitize=False):
263264
text = text.replace('{0}:glyph:'.format(self.uid), '')
264265

265266
if sanitize:
266-
text = sanitizer.sanitize(text)
267+
text = clean(text, tags=self.blocktag_allowlist)
267268

268269
text = self.retrieveTags(text)
269270
text = self.retrieveURLs(text)

textile/tools/sanitizer.py

Lines changed: 0 additions & 11 deletions
This file was deleted.

0 commit comments

Comments
 (0)