Skip to content

Commit 8ecebbb

Browse files
committed
update to latest html5lib
1 parent b48cb55 commit 8ecebbb

3 files changed

Lines changed: 9 additions & 15 deletions

File tree

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
html5lib==0.999
1+
html5lib>=1.0b10
22
Pillow==3.0.0

tests/test_textile.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,16 @@ def test_sanitize():
7171
expect = textile.Textile().parse(test, sanitize=True)
7272
assert result == expect
7373

74-
test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
75-
result = '<p style="">a paragraph of evil text</p>'
74+
test = """<p onclick="alert('evil');">a paragraph of evil text</p>"""
75+
result = '<p>a paragraph of evil text</p>'
7676
expect = textile.Textile().parse(test, sanitize=True)
7777
assert result == expect
7878

7979
test = """<p>a paragraph of benign text<br />and more text</p>"""
8080
result = '<p>a paragraph of benign text<br />\nand more text</p>'
8181
expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
8282
assert result == expect
83-
except Exception as e:
83+
except ImportError as e:
8484
message = '{0}'.format(e)
8585
assert "html5lib not available" in message
8686

textile/tools/sanitizer.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,11 @@ def sanitize(string):
44
break the page.
55
"""
66
try:
7-
import html5lib
8-
from html5lib import sanitizer, serializer, treewalkers
7+
from html5lib import parseFragment, serialize
98
except ImportError:
109
raise Exception("html5lib not available")
1110

12-
p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
13-
tree = p.parseFragment(string)
14-
15-
walker = treewalkers.getTreeWalker("etree")
16-
stream = walker(tree)
17-
18-
s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False,
19-
quote_attr_values=True)
20-
return s.render(stream)
11+
parsed = parseFragment(string)
12+
clean = serialize(parsed, sanitize=True, omit_optional_tags=False,
13+
quote_attr_values='always')
14+
return clean

0 commit comments

Comments
 (0)