Skip to content

Commit a16333a

Browse files
author
Tomasz-Kluczkowski
committed
Add amend_domain_values and basic test.
1 parent a9c9f24 commit a16333a

2 files changed

Lines changed: 77 additions & 6 deletions

File tree

data_extractor.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,28 @@ class DataExtractor:
88
def __init__(self):
99
self.data = WEBSITES
1010

11-
def find_items(self, value):
11+
def find_items(self, value=4):
1212
"""
13-
Find and return a new list of items where key "value" is greater than or equal to parameter value.
13+
Find and return a new list of items where key "value" is greater than or equal to parameter value. Default = 4.
1414
:return: list(dict), list of dictionaries matching the above filtering rule.
1515
"""
1616
return [item for item in self.data if item.get('value') and item.get('value') >= value]
1717

18+
def amend_domain_values(self, prefix='www.'):
19+
"""
20+
Fixes missing parts of the domain names.
21+
:param prefix: str, prefix to add to the domain name. Default = 'www'.
22+
:return: amended: list(dict), amended list of web records.
23+
"""
24+
amended = []
25+
for item in self.data:
26+
if item.get('domain') and not item.get('domain').startswith(prefix):
27+
item['domain'] = f"{prefix}{item['domain']}"
28+
amended.append(item)
29+
return amended
30+
1831

19-
# data_extractor = DataExtractor()
20-
# print(data_extractor.find_items(4))
21-
# print(len(data_extractor.find_items(4)))
32+
data_extractor = DataExtractor()
33+
# print(data_extractor.amend_domain_values())
34+
print(data_extractor.find_items(4))
35+
print(len(data_extractor.find_items(4)))

tests/test_data_extractor.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,61 @@ def test_find_items(self):
2929
'value': 5
3030
}
3131
]
32-
assert data_extractor.find_items(4) == expected
32+
assert data_extractor.find_items() == expected
33+
34+
def test_amend_domain_values(self):
35+
expected = [
36+
{
37+
'name': 'Google',
38+
'url': 'https://www.google.co.uk',
39+
'domain': 'www.google.co.uk',
40+
'secure': True,
41+
'value': 5},
42+
{
43+
'name': 'Facebook',
44+
'url': 'https://developers.facebook.com/blog/post/2018/10/02/facebook-login-update/',
45+
'domain': 'www.facebook.com',
46+
'secure': True, 'value': 4},
47+
{
48+
'name': 'Bing',
49+
'url': 'https://www.bing.com/search?q=athlete&qs=n&form=QBLH&sp=-1&pq=athlete&sc=8-7&sk=&cvid=53830DD7FB2E47B7A5D9CF27F106BC9A',
50+
'domain': 'www.bing.com',
51+
'secure': False,
52+
'value': 3
53+
},
54+
{
55+
'name': 'Ask',
56+
'url': 'https://uk.ask.com/web?o=0&l=dir&qo=serpSearchTopBox&q=jupiter',
57+
'domain': 'www.ask.com',
58+
'secure': False,
59+
'value': 1},
60+
{
61+
'name': 'Duck Duck Go',
62+
'url': 'http://duckduckgo.com/?q=plane&t=h_&ia=web',
63+
'domain': 'www.duckduckgo.com',
64+
'secure': True,
65+
'value': 2
66+
},
67+
{
68+
'name': 'Vimeo',
69+
'url': 'https://vimeo.com/53812885',
70+
'domain': 'www.vimeo.com',
71+
'secure': False,
72+
'value': 2
73+
},
74+
{
75+
'name': 'YouTube',
76+
'url': 'https://www.youtube.com/watch?v=09Cd7NKKvDc',
77+
'domain': 'www.youtube.com',
78+
'secure': True,
79+
'value': 5
80+
},
81+
{
82+
'name': 'Daily Motion',
83+
'url': 'http://www.dailymotion.com/search/football',
84+
'domain': 'www.dailymotion.com',
85+
'secure': True,
86+
'value': 1
87+
}
88+
]
89+
assert data_extractor.amend_domain_values() == expected

0 commit comments

Comments
 (0)