Skip to content

Commit 4840c86

Browse files
committed
add text summarizer
1 parent 8969dc4 commit 4840c86

6 files changed

Lines changed: 294 additions & 9 deletions

File tree

project/app/api/crud.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
async def post(payload: SummaryPayloadSchema) -> int:
11-
summary = TextSummary(url=payload.url, summary="dummy summary",)
11+
summary = TextSummary(url=payload.url, summary="")
1212
await summary.save()
1313
return summary.id
1414

project/app/api/summaries.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from typing import List
55

6-
from fastapi import APIRouter, HTTPException, Path
6+
from fastapi import APIRouter, HTTPException, Path, BackgroundTasks
77

88
from app.api import crud
99
from app.models.pydantic import (
@@ -12,15 +12,20 @@
1212
SummaryUpdatePayloadSchema,
1313
)
1414
from app.models.tortoise import SummarySchema
15+
from app.summarizer import generate_summary
1516

1617

1718
router = APIRouter()
1819

1920

2021
@router.post("/", response_model=SummaryResponseSchema, status_code=201)
21-
async def create_summary(payload: SummaryPayloadSchema) -> SummaryResponseSchema:
22+
async def create_summary(
23+
payload: SummaryPayloadSchema, background_tasks: BackgroundTasks
24+
) -> SummaryResponseSchema:
2225
summary_id = await crud.post(payload)
2326

27+
background_tasks.add_task(generate_summary, summary_id, payload.url)
28+
2429
response_object = {"id": summary_id, "url": payload.url}
2530
return response_object
2631

project/app/summarizer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# project/app/summarizer.py
2+
3+
4+
import nltk
5+
from newspaper import Article
6+
7+
from app.models.tortoise import TextSummary
8+
9+
10+
async def generate_summary(summary_id: int, url: str) -> None:
11+
article = Article(url)
12+
article.download()
13+
article.parse()
14+
15+
try:
16+
nltk.data.find("tokenizers/punkt")
17+
except LookupError:
18+
nltk.download("punkt")
19+
finally:
20+
article.nlp()
21+
22+
summary = article.summary
23+
24+
await TextSummary.filter(id=summary_id).update(summary=summary)

project/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ fastapi==0.55.1
44
flake8===3.8.2
55
gunicorn==20.0.4
66
isort==4.3.21
7+
newspaper3k
78
pytest==5.4.2
89
pytest-cov==2.9.0
10+
pytest-xdist==1.32.0
911
requests==2.23.0
1012
tortoise-orm==0.16.12
1113
uvicorn==0.11.5

project/tests/test_summaries.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55

66
import pytest
77

8+
from app.api import summaries
9+
10+
11+
def test_create_summary(test_app_with_db, monkeypatch):
12+
def mock_generate_summary(summary_id, url):
13+
return None
14+
15+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
816

9-
def test_create_summary(test_app_with_db):
1017
response = test_app_with_db.post(
1118
"/summaries/", data=json.dumps({"url": "https://foo.bar"})
1219
)
@@ -33,7 +40,12 @@ def test_create_summaries_invalid_json(test_app):
3340
assert response.json()["detail"][0]["msg"] == "URL scheme not permitted"
3441

3542

36-
def test_read_summary(test_app_with_db):
43+
def test_read_summary(test_app_with_db, monkeypatch):
44+
def mock_generate_summary(summary_id, url):
45+
return None
46+
47+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
48+
3749
response = test_app_with_db.post(
3850
"/summaries/", data=json.dumps({"url": "https://foo.bar"})
3951
)
@@ -45,7 +57,6 @@ def test_read_summary(test_app_with_db):
4557
response_dict = response.json()
4658
assert response_dict["id"] == summary_id
4759
assert response_dict["url"] == "https://foo.bar"
48-
assert response_dict["summary"]
4960
assert response_dict["created_at"]
5061

5162

@@ -68,7 +79,12 @@ def test_read_summary_incorrect_id(test_app_with_db):
6879
}
6980

7081

71-
def test_read_all_summaries(test_app_with_db):
82+
def test_read_all_summaries(test_app_with_db, monkeypatch):
83+
def mock_generate_summary(summary_id, url):
84+
return None
85+
86+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
87+
7288
response = test_app_with_db.post(
7389
"/summaries/", data=json.dumps({"url": "https://foo.bar"})
7490
)
@@ -81,7 +97,12 @@ def test_read_all_summaries(test_app_with_db):
8197
assert len(list(filter(lambda d: d["id"] == summary_id, response_list))) == 1
8298

8399

84-
def test_remove_summary(test_app_with_db):
100+
def test_remove_summary(test_app_with_db, monkeypatch):
101+
def mock_generate_summary(summary_id, url):
102+
return None
103+
104+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
105+
85106
response = test_app_with_db.post(
86107
"/summaries/", data=json.dumps({"url": "https://foo.bar"})
87108
)
@@ -111,7 +132,12 @@ def test_remove_summary_incorrect_id(test_app_with_db):
111132
}
112133

113134

114-
def test_update_summary(test_app_with_db):
135+
def test_update_summary(test_app_with_db, monkeypatch):
136+
def mock_generate_summary(summary_id, url):
137+
return None
138+
139+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
140+
115141
response = test_app_with_db.post(
116142
"/summaries/", data=json.dumps({"url": "https://foo.bar"})
117143
)
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
# project/tests/test_summaries_unit.py
2+
3+
4+
import json
5+
from datetime import datetime
6+
7+
import pytest
8+
9+
from app.api import crud, summaries
10+
11+
12+
def test_create_summary(test_app, monkeypatch):
13+
test_request_payload = {"url": "https://foo.bar"}
14+
test_response_payload = {"id": 1, "url": "https://foo.bar"}
15+
16+
async def mock_post(payload):
17+
return 1
18+
19+
monkeypatch.setattr(crud, "post", mock_post)
20+
21+
def mock_generate_summary(summary_id, url):
22+
return None
23+
24+
monkeypatch.setattr(summaries, "generate_summary", mock_generate_summary)
25+
26+
response = test_app.post("/summaries/", data=json.dumps(test_request_payload),)
27+
28+
assert response.status_code == 201
29+
assert response.json() == test_response_payload
30+
31+
32+
def test_create_summaries_invalid_json(test_app):
33+
response = test_app.post("/summaries/", data=json.dumps({}))
34+
assert response.status_code == 422
35+
assert response.json() == {
36+
"detail": [
37+
{
38+
"loc": ["body", "payload", "url"],
39+
"msg": "field required",
40+
"type": "value_error.missing",
41+
}
42+
]
43+
}
44+
45+
response = test_app.post("/summaries/", data=json.dumps({"url": "invalid://url"}))
46+
assert response.status_code == 422
47+
assert response.json()["detail"][0]["msg"] == "URL scheme not permitted"
48+
49+
50+
def test_read_summary(test_app, monkeypatch):
51+
test_data = {
52+
"id": 1,
53+
"url": "https://foo.bar",
54+
"summary": "summary",
55+
"created_at": datetime.utcnow().isoformat(),
56+
}
57+
58+
async def mock_get(id):
59+
return test_data
60+
61+
monkeypatch.setattr(crud, "get", mock_get)
62+
63+
response = test_app.get("/summaries/1/")
64+
assert response.status_code == 200
65+
assert response.json() == test_data
66+
67+
68+
def test_read_summary_incorrect_id(test_app, monkeypatch):
69+
async def mock_get(id):
70+
return None
71+
72+
monkeypatch.setattr(crud, "get", mock_get)
73+
74+
response = test_app.get("/summaries/999/")
75+
assert response.status_code == 404
76+
assert response.json()["detail"] == "Summary not found"
77+
78+
79+
def test_read_all_summaries(test_app, monkeypatch):
80+
test_data = [
81+
{
82+
"id": 1,
83+
"url": "https://foo.bar",
84+
"summary": "summary",
85+
"created_at": datetime.utcnow().isoformat(),
86+
},
87+
{
88+
"id": 2,
89+
"url": "https://testdrivenn.io",
90+
"summary": "summary",
91+
"created_at": datetime.utcnow().isoformat(),
92+
},
93+
]
94+
95+
async def mock_get_all():
96+
return test_data
97+
98+
monkeypatch.setattr(crud, "get_all", mock_get_all)
99+
100+
response = test_app.get("/summaries/")
101+
assert response.status_code == 200
102+
assert response.json() == test_data
103+
104+
105+
def test_remove_summary(test_app, monkeypatch):
106+
async def mock_get(id):
107+
return {
108+
"id": 1,
109+
"url": "https://foo.bar",
110+
"summary": "summary",
111+
"created_at": datetime.utcnow().isoformat(),
112+
}
113+
114+
monkeypatch.setattr(crud, "get", mock_get)
115+
116+
async def mock_delete(id):
117+
return id
118+
119+
monkeypatch.setattr(crud, "delete", mock_delete)
120+
121+
response = test_app.delete("/summaries/1/")
122+
assert response.status_code == 200
123+
assert response.json() == {"id": 1, "url": "https://foo.bar"}
124+
125+
126+
def test_remove_summary_incorrect_id(test_app, monkeypatch):
127+
async def mock_get(id):
128+
return None
129+
130+
monkeypatch.setattr(crud, "get", mock_get)
131+
132+
response = test_app.delete("/summaries/999/")
133+
assert response.status_code == 404
134+
assert response.json()["detail"] == "Summary not found"
135+
136+
137+
def test_update_summary(test_app, monkeypatch):
138+
test_request_payload = {"url": "https://foo.bar", "summary": "updated"}
139+
test_response_payload = {
140+
"id": 1,
141+
"url": "https://foo.bar",
142+
"summary": "summary",
143+
"created_at": datetime.utcnow().isoformat(),
144+
}
145+
146+
async def mock_put(id, payload):
147+
return test_response_payload
148+
149+
monkeypatch.setattr(crud, "put", mock_put)
150+
151+
response = test_app.put("/summaries/1/", data=json.dumps(test_request_payload),)
152+
assert response.status_code == 200
153+
assert response.json() == test_response_payload
154+
155+
156+
@pytest.mark.parametrize(
157+
"summary_id, payload, status_code, detail",
158+
[
159+
[
160+
999,
161+
{"url": "https://foo.bar", "summary": "updated!"},
162+
404,
163+
"Summary not found",
164+
],
165+
[
166+
0,
167+
{"url": "https://foo.bar", "summary": "updated!"},
168+
422,
169+
[
170+
{
171+
"loc": ["path", "id"],
172+
"msg": "ensure this value is greater than 0",
173+
"type": "value_error.number.not_gt",
174+
"ctx": {"limit_value": 0},
175+
}
176+
],
177+
],
178+
[
179+
1,
180+
{},
181+
422,
182+
[
183+
{
184+
"loc": ["body", "payload", "url"],
185+
"msg": "field required",
186+
"type": "value_error.missing",
187+
},
188+
{
189+
"loc": ["body", "payload", "summary"],
190+
"msg": "field required",
191+
"type": "value_error.missing",
192+
},
193+
],
194+
],
195+
[
196+
1,
197+
{"url": "https://foo.bar"},
198+
422,
199+
[
200+
{
201+
"loc": ["body", "payload", "summary"],
202+
"msg": "field required",
203+
"type": "value_error.missing",
204+
}
205+
],
206+
],
207+
],
208+
)
209+
def test_update_summary_invalid(
210+
test_app, monkeypatch, summary_id, payload, status_code, detail
211+
):
212+
async def mock_put(id, payload):
213+
return None
214+
215+
monkeypatch.setattr(crud, "put", mock_put)
216+
217+
response = test_app.put(f"/summaries/{summary_id}/", data=json.dumps(payload))
218+
assert response.status_code == status_code
219+
assert response.json()["detail"] == detail
220+
221+
222+
def test_update_summary_invalid_url(test_app):
223+
response = test_app.put(
224+
"/summaries/1/",
225+
data=json.dumps({"url": "invalid://url", "summary": "updated!"}),
226+
)
227+
assert response.status_code == 422
228+
assert response.json()["detail"][0]["msg"] == "URL scheme not permitted"

0 commit comments

Comments
 (0)