Skip to content

Commit f1a79fb

Browse files
committed
Update to make sure we don't drop the html tag when ditching elements
1 parent 46f0302 commit f1a79fb

1 file changed

Lines changed: 2 additions & 1 deletion

File tree

readability/readability.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,9 @@ def remove_unlikely_candidates(self):
350350
if len(s) < 2:
351351
continue
352352
#self.debug(s)
353-
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
353+
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag not in ['html', 'body']:
354354
self.debug("Removing unlikely candidate - %s" % describe(elem))
355+
import ipdb; ipdb.set_trace()
355356
elem.drop_tree()
356357

357358
def transform_misused_divs_into_paragraphs(self):

0 commit comments

Comments
 (0)