File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -110,7 +110,14 @@ def _parse(self, input):
110110 doc = html_cleaner .clean_html (doc )
111111 base_href = self .url
112112 if base_href :
113- doc .make_links_absolute (base_href , resolve_base_href = True )
113+ # trying to guard against bad links like <a href="http://[http://...">
114+ try :
115+ # such support is added in lxml 3.3.0
116+ doc .make_links_absolute (base_href , resolve_base_href = True , handle_failures = 'discard' )
117+ except TypeError : #make_links_absolute() got an unexpected keyword argument 'handle_failures'
118+ # then we have lxml < 3.3.0
119+ # please upgrade to lxml >= 3.3.0 if you're failing here!
120+ doc .make_links_absolute (base_href , resolve_base_href = True )
114121 else :
115122 doc .resolve_base_href ()
116123 return doc
Original file line number Diff line number Diff line change 1414
1515setup (
1616 name = "readability-lxml" ,
17- version = "0.6.0.3 " ,
17+ version = "0.6.0.4 " ,
1818 author = "Yuri Baburov" ,
1919 author_email = "burchik@gmail.com" ,
2020 description = "fast python port of arc90's readability tool" ,
You can’t perform that action at this time.
0 commit comments