Skip to content

Commit 82ad64f

Browse files
committed
Add author to Document
1 parent 0d0503f commit 82ad64f

2 files changed

Lines changed: 14 additions & 0 deletions

File tree

readability/htmls.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ def get_title(doc):
5858
return norm_title(title.text)
5959

6060

61+
def get_author(doc):
62+
author = doc.find(".//meta[@name='author']")
63+
if author is None or 'content' not in author.keys() or \
64+
len(author.get('content')) == 0:
65+
return "[no-author]"
66+
67+
return author.get('content')
68+
69+
6170
def add_match(collection, text, orig):
6271
text = norm_title(text)
6372
if len(text.split()) >= 2 and len(text) >= 15:

readability/readability.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .htmls import build_doc
1616
from .htmls import get_body
1717
from .htmls import get_title
18+
from .htmls import get_author
1819
from .htmls import shorten_title
1920
from .compat import str_, bytes_, tostring_, pattern_type
2021
from .debug import describe, text_content
@@ -192,6 +193,10 @@ def title(self):
192193
"""Returns document title"""
193194
return get_title(self._html(True))
194195

196+
def author(self):
197+
"""Returns document author"""
198+
return get_author(self._html(True))
199+
195200
def short_title(self):
196201
"""Returns cleaned up document title"""
197202
return shorten_title(self._html(True))

0 commit comments

Comments
 (0)