Skip to content

Commit d12b61a

Browse files
committed
Staged current progress
1 parent 0d0a9f0 commit d12b61a

2 files changed

Lines changed: 106 additions & 0 deletions

File tree

funcs/replit_scrapper.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
import os
4+
from selenium import webdriver
5+
from selenium.webdriver.chrome.options import Options
6+
from selenium.webdriver.support.wait import WebDriverWait
7+
from selenium.webdriver.support import expected_conditions as EC
8+
from selenium.webdriver.common.by import By
9+
import time
10+
import random
11+
from selenium.webdriver.common.action_chains import ActionChains
12+
13+
class ReplitScrapper():
14+
def __init__(self):
15+
chrome_options = Options()
16+
chrome_options.add_argument('incognito')
17+
chrome_options.add_argument("--window-size=1920,1080")
18+
# chrome_options.add_argument("--headless")
19+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
20+
chrome_options.add_argument(f'user-agent={user_agent}')
21+
driver = webdriver.Chrome(options=chrome_options)
22+
self.driver = driver
23+
24+
# def login(self) -> None:
25+
# self.driver.get(os.environ['LOGINURL'])
26+
# wait = WebDriverWait(self.driver, 10)
27+
# email = wait.until(EC.visibility_of_element_located((By.ID, "1val-input")))
28+
# time.sleep(random.randint(2, 5))
29+
# email.send_keys(os.environ['EMAIL'])
30+
# password = wait.until(EC.visibility_of_element_located((By.ID, "2val-input")))
31+
# time.sleep(random.randint(2, 5))
32+
# password.send_keys(os.environ['PASSWORD'])
33+
# submit_button = wait.until(EC.visibility_of_element_located((By.XPATH, "//button[@type='submit']")))
34+
# time.sleep(random.randint(2, 5))
35+
# action = ActionChains(self.driver)
36+
# action.move_to_element(submit_button)
37+
# action.click()
38+
39+
def get_file_list(self) -> list:
40+
result = []
41+
wait = WebDriverWait(self.driver, 10)
42+
showcode_button = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[@class='css-148day7']/span[@class='css-36v8q4']/button[@type='button']")))
43+
time.sleep(random.randint(2, 5))
44+
action = ActionChains(self.driver)
45+
action.move_to_element(showcode_button)
46+
action.click()
47+
files = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[@role='treeitem']/preceding-sibling::div")))
48+
if len(files) != 0:
49+
for file in files:
50+
result.append(file.div.get_attribute('title'))
51+
else:
52+
print("File list not found.")
53+
return result
54+
55+
def cleanup(self) -> None:
56+
self.driver.quit()

tests/test_replit_scrapper.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import unittest
2+
from funcs.replit_scrapper import ReplitScrapper
3+
from selenium.webdriver.support.wait import WebDriverWait
4+
from selenium.webdriver.support import expected_conditions as EC
5+
from selenium.webdriver.common.by import By
6+
7+
8+
class Test(unittest.TestCase):
9+
10+
# def test_scrapper_quit(self):
11+
# scrapper = ReplitScrapper()
12+
# scrapper.driver.get('https://www.google.com/')
13+
# scrapper.cleanup()
14+
# self.assertFalse(scrapper.driver.service.is_connectable())
15+
16+
# def test_scrapper_login_replit_homepage(self):
17+
# scrapper = ReplitScrapper()
18+
# scrapper.login()
19+
# WebDriverWait(scrapper.driver, 10).until(
20+
# EC.presence_of_element_located((By.XPATH, "//div[@data-cy='home-page']"))
21+
# )
22+
# self.assertEqual(scrapper.driver.current_url, 'https://replit.com/~')
23+
# scrapper.cleanup()
24+
25+
# def test_scrapper_get_given_url_after_login(self):
26+
# scrapper = ReplitScrapper()
27+
# scrapper.login()
28+
# scrapper.driver.get('https://replit.com/@JustCallMeRay/Group2-Aug-23')
29+
# self.assertEqual(scrapper.driver.current_url, 'https://replit.com/@JustCallMeRay/Group2-Aug-23')
30+
# scrapper.cleanup()
31+
32+
# def test_scrapper_returns_list_given_empty_input(self):
33+
# scrapper = ReplitScrapper()
34+
# scrapper.login()
35+
# file_list = scrapper.get_file_list()
36+
# self.assertIsInstance(file_list, list)
37+
# scrapper.cleanup()
38+
39+
def test_scrapper_returns_file_list_given_non_empty_input(self):
40+
scrapper = ReplitScrapper()
41+
# scrapper.login()
42+
scrapper.driver.get('https://replit.com/@JustCallMeRay/Group2-Aug-23')
43+
file_list = scrapper.get_file_list()
44+
expected = ['main.py']
45+
self.assertListEqual(file_list, expected)
46+
scrapper.cleanup()
47+
48+
49+
if __name__ == "__main__":
50+
unittest.main()

0 commit comments

Comments
 (0)