Skip to content

Commit f20e7ba

Browse files
committed
Add class ReplitScrapper and corresponding tests
1 parent 2c482ae commit f20e7ba

2 files changed

Lines changed: 161 additions & 40 deletions

File tree

funcs/replit_scrapper.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from playwright.sync_api import sync_playwright
2+
from playwright_stealth import stealth_sync
3+
4+
5+
class ReplitScrapper():
6+
ua = (
7+
"""
8+
Mozilla/5.0 (Windows NT 10.0; Win64; x64)
9+
AppleWebKit/537.36 (KHTML, like Gecko)
10+
Chrome/116.0.0.0
11+
Safari/537.36
12+
Edg/116.0.1938.81
13+
"""
14+
)
15+
16+
def __init__(self, login_name, login_password):
17+
self.__login_name = login_name
18+
self.__login_password = login_password
19+
self._replit_url = None
20+
self._downloaded_filename = None
21+
22+
def set_replit_url(self, replit_url) -> None:
23+
if replit_url is None:
24+
raise ValueError
25+
self._replit_url = replit_url
26+
27+
def get_replit_url(self) -> str:
28+
if self._replit_url is None:
29+
raise ValueError("Missing replit_url")
30+
return self._replit_url
31+
32+
def _set_downloaded_filename(self, filename) -> None:
33+
if filename is None:
34+
raise ValueError("ReplitScrapper._set_downloaded_filename() argument is None")
35+
self._downloaded_filename = filename
36+
37+
def get_downloaded_filename(self) -> str:
38+
if self._downloaded_filename is None:
39+
raise ValueError("Missing downloaded_filename")
40+
return self._downloaded_filename
41+
42+
def _visit_replit_repo(self, page) -> None:
43+
response = page.goto(self.get_replit_url(), wait_until="domcontentloaded")
44+
if response.status != 200:
45+
if response.status == 404:
46+
print(f"response.status = {response.status}")
47+
raise ValueError("Invalid replit_url")
48+
else:
49+
print(f"response.status = {response.status}")
50+
raise ValueError("ReplitScrapper._visit_replit_repo() something other than 404 happened")
51+
52+
def _login_replit(self, page) -> None:
53+
# Login
54+
page.goto('https://replit.com/login', wait_until="domcontentloaded")
55+
page.screenshot(path="./screen-shots/replit.png")
56+
url_init = "https://identitytoolkit.googleapis.com/v1/accounts"
57+
with page.expect_response(lambda response: url_init in response.url) as response_info:
58+
page.locator(
59+
"xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[1]/input"
60+
).fill(self.__login_name)
61+
page.locator(
62+
"xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[2]/div/input"
63+
).fill(self.__login_password)
64+
page.locator(
65+
"xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[3]/button"
66+
).click()
67+
response = response_info.value
68+
if response.status != 200:
69+
print(response)
70+
if response.status == 400:
71+
print(f"response.status = {response.status}")
72+
raise ValueError("Invalid login credentials")
73+
else:
74+
print(f"response.status = {response.status}")
75+
raise ValueError("ReplitScrapper._login_replit() something other than 401 happened")
76+
page.wait_for_url("https://replit.com/~")
77+
page.screenshot(path="./screen-shots/replit_after_login.png")
78+
79+
def _download_as_zip(self, page) -> None:
80+
# Wait for page load
81+
page.locator(
82+
"xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[3]/div/div[1]/button/div/span"
83+
).wait_for()
84+
while page.locator(
85+
"xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button"
86+
).text_content() != "Run":
87+
print(page.locator(
88+
"xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button"
89+
).text_content())
90+
page.wait_for_timeout(2000)
91+
page.screenshot(path="./screen-shots/target_page.png")
92+
93+
# Begin downloading
94+
page.locator(
95+
"xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[2]/div[1]/div[1]/div/button[3]"
96+
).click()
97+
with page.expect_download() as download_info:
98+
page.locator(
99+
"xpath=/html/body/div[@class='css-1o92kwk']//div[@id='item-4']//div[@class='css-1l2rn59']"
100+
).click()
101+
download = download_info.value
102+
self._set_downloaded_filename(download.suggested_filename)
103+
download.save_as(f"./screen-shots/{download.suggested_filename}")
104+
105+
def run(self):
106+
with sync_playwright() as p:
107+
# Context setup
108+
browser = p.chromium.launch(slow_mo=50)
109+
# browser = p.chromium.launch(headless=False
110+
# , slow_mo=50
111+
# )
112+
context = browser.new_context(user_agent=ReplitScrapper.ua)
113+
page = context.new_page()
114+
stealth_sync(page)
115+
116+
# Login replit
117+
self._login_replit(page)
118+
119+
# Download repo files as zip
120+
self._visit_replit_repo(page)
121+
self._download_as_zip(page)
122+
123+
# Clean-up
124+
context.close()
125+
browser.close()

tests/test_replit_scrapper.py

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,45 @@
11
import unittest
22
from funcs.replit_scrapper import ReplitScrapper
3-
from selenium.webdriver.support.wait import WebDriverWait
4-
from selenium.webdriver.support import expected_conditions as EC
5-
from selenium.webdriver.common.by import By
3+
import os
4+
from dotenv import load_dotenv
5+
load_dotenv()
66

77

88
class Test(unittest.TestCase):
99

10-
# def test_scrapper_quit(self):
11-
# scrapper = ReplitScrapper()
12-
# scrapper.driver.get('https://www.google.com/')
13-
# scrapper.cleanup()
14-
# self.assertFalse(scrapper.driver.service.is_connectable())
15-
16-
# def test_scrapper_login_replit_homepage(self):
17-
# scrapper = ReplitScrapper()
18-
# scrapper.login()
19-
# WebDriverWait(scrapper.driver, 10).until(
20-
# EC.presence_of_element_located((By.XPATH, "//div[@data-cy='home-page']"))
21-
# )
22-
# self.assertEqual(scrapper.driver.current_url, 'https://replit.com/~')
23-
# scrapper.cleanup()
24-
25-
# def test_scrapper_get_given_url_after_login(self):
26-
# scrapper = ReplitScrapper()
27-
# scrapper.login()
28-
# scrapper.driver.get('https://replit.com/@JustCallMeRay/Group2-Aug-23')
29-
# self.assertEqual(scrapper.driver.current_url, 'https://replit.com/@JustCallMeRay/Group2-Aug-23')
30-
# scrapper.cleanup()
31-
32-
# def test_scrapper_returns_list_given_empty_input(self):
33-
# scrapper = ReplitScrapper()
34-
# scrapper.login()
35-
# file_list = scrapper.get_file_list()
36-
# self.assertIsInstance(file_list, list)
37-
# scrapper.cleanup()
38-
39-
def test_scrapper_returns_file_list_given_non_empty_input(self):
40-
scrapper = ReplitScrapper()
41-
# scrapper.login()
42-
scrapper.driver.get('https://replit.com/@JustCallMeRay/Group2-Aug-23')
43-
file_list = scrapper.get_file_list()
44-
expected = ['main.py']
45-
self.assertListEqual(file_list, expected)
46-
scrapper.cleanup()
10+
def test_scrapper_raise_value_error_when_replit_url_not_set(self):
11+
scrapper = ReplitScrapper(login_name=None, login_password=None)
12+
with self.assertRaises(ValueError) as ctx_manager:
13+
scrapper.get_replit_url()
14+
self.assertEqual(str(ctx_manager.exception), 'Missing replit_url')
15+
16+
def test_scrapper_return_replit_url(self):
17+
test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers"
18+
19+
scrapper = ReplitScrapper(login_name=None, login_password=None)
20+
scrapper.set_replit_url(test_url)
21+
self.assertEqual(scrapper.get_replit_url(), test_url)
22+
23+
# Commented out to avoid acount freezes
24+
# def test_scrapper_login_with_invalid_credentials(self):
25+
# scrapper = ReplitScrapper(login_name = os.environ['EMAIL'], login_password = "ThisIsNotTheCorrectPassword")
26+
# with self.assertRaises(ValueError) as ctx_manager:
27+
# scrapper.run()
28+
# self.assertEqual(str(ctx_manager.exception), 'Invalid login credentials')
29+
30+
def test_scrapper_download_repo_as_zip(self):
31+
test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers"
32+
target_zip_name = "SlipperyGargantuanDebuggers.zip"
33+
WDIR = os.path.abspath(os.path.dirname(__name__))
34+
full_target_file_path = os.path.join(WDIR, "screen-shots", target_zip_name)
35+
print(full_target_file_path)
36+
37+
scrapper = ReplitScrapper(login_name=os.environ['EMAIL'], login_password=os.environ['PASSWORD'])
38+
scrapper.set_replit_url(test_url)
39+
scrapper.run()
40+
41+
print(scrapper.get_downloaded_filename())
42+
self.assertTrue(os.path.exists(full_target_file_path))
4743

4844

4945
if __name__ == "__main__":

0 commit comments

Comments
 (0)