Skip to content

Commit 0899293

Browse files
authored
Merge pull request #166 from mattblaha/add-author
Add author
2 parents 11a53f9 + 3603c14 commit 0899293

3 files changed

Lines changed: 24 additions & 0 deletions

File tree

readability/htmls.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ def get_title(doc):
5858
return norm_title(title.text)
5959

6060

61+
def get_author(doc):
62+
author = doc.find(".//meta[@name='author']")
63+
if author is None or 'content' not in author.keys() or \
64+
len(author.get('content')) == 0:
65+
return "[no-author]"
66+
67+
return author.get('content')
68+
69+
6170
def add_match(collection, text, orig):
6271
text = norm_title(text)
6372
if len(text.split()) >= 2 and len(text) >= 15:

readability/readability.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .htmls import build_doc
1616
from .htmls import get_body
1717
from .htmls import get_title
18+
from .htmls import get_author
1819
from .htmls import shorten_title
1920
from .compat import str_, bytes_, tostring_, pattern_type
2021
from .debug import describe, text_content
@@ -192,6 +193,10 @@ def title(self):
192193
"""Returns document title"""
193194
return get_title(self._html(True))
194195

196+
def author(self):
197+
"""Returns document author"""
198+
return get_author(self._html(True))
199+
195200
def short_title(self):
196201
"""Returns cleaned up document title"""
197202
return shorten_title(self._html(True))

tests/test_article_only.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,13 @@ def test_utf8_kanji(self):
124124
sample = load_sample("utf-8-kanji.sample.html")
125125
doc = Document(sample)
126126
res = doc.summary()
127+
128+
def test_author_present(self):
129+
sample = load_sample("the-hurricane-rubin-carter-denzel-washington.html")
130+
doc = Document(sample)
131+
assert 'Alex von Tunzelmann' == doc.author()
132+
133+
def test_author_absent(self):
134+
sample = load_sample("si-game.sample.html")
135+
doc = Document(sample)
136+
assert '[no-author]' == doc.author()

0 commit comments

Comments
 (0)