Skip to content

Commit 3cdc3d6

Browse files
author
Jan Weiß
committed
Adding comment about oversight in transform_misused_divs_into_paragraphs().
1 parent 960f885 commit 3cdc3d6

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

readability/readability.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ def remove_unlikely_candidates(self):
290290
def transform_misused_divs_into_paragraphs(self):
291291
for elem in self.tags(self.html, 'div'):
292292
# transform <div>s that do not contain other block elements into <p>s
293+
#FIXME: The current implementation ignores all descendants that are not direct children of elem
294+
# This results in incorrect results in case there is an <img> buried within an <a> for example
293295
if not REGEXES['divToPElementsRe'].search(unicode(''.join(map(tostring, list(elem))))):
294296
#self.debug("Altering %s to p" % (describe(elem)))
295297
elem.tag = "p"

0 commit comments

Comments
 (0)