Skip to content

Commit 7fdcf98

Browse files
Copilotgkorland
andcommitted
feat: cache Playwright seed repository clones
Co-authored-by: gkorland <753206+gkorland@users.noreply.github.com>
1 parent 966467f commit 7fdcf98

3 files changed

Lines changed: 107 additions & 6 deletions

File tree

.github/workflows/playwright.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@ jobs:
4444
with:
4545
version: "latest"
4646

47+
- name: Resolve seed repository SHAs
48+
id: seed-repo-shas
49+
run: |
50+
echo "graphrag_sdk=$(git ls-remote https://github.com/FalkorDB/GraphRAG-SDK HEAD | cut -f1)" >> "$GITHUB_OUTPUT"
51+
echo "flask=$(git ls-remote https://github.com/pallets/flask HEAD | cut -f1)" >> "$GITHUB_OUTPUT"
52+
53+
- name: Cache seeded repository clones
54+
uses: actions/cache@v4
55+
with:
56+
path: repositories
57+
key: ${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-${{ steps.seed-repo-shas.outputs.graphrag_sdk }}-${{ steps.seed-repo-shas.outputs.flask }}
58+
restore-keys: |
59+
${{ runner.os }}-seed-repos-${{ hashFiles('e2e/seed_test_data.py') }}-
60+
4761
- name: Install backend dependencies
4862
run: uv sync
4963

e2e/seed_test_data.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
#!/usr/bin/env python3
22
"""Seed FalkorDB with test data for Playwright e2e tests."""
33

4-
import os
5-
import sys
64
import logging
5+
import os
6+
import shutil
7+
import subprocess
8+
from pathlib import Path
9+
from urllib.parse import urlparse
10+
11+
from api.project import Project
12+
from falkordb import FalkorDB
713

814
logging.basicConfig(
915
level=logging.INFO,
1016
format="%(asctime)s - %(levelname)s - %(message)s",
1117
)
1218
logger = logging.getLogger(__name__)
1319

14-
from falkordb import FalkorDB
15-
from api.project import Project
16-
1720
REPOS = [
1821
"https://github.com/FalkorDB/GraphRAG-SDK",
1922
"https://github.com/pallets/flask",
@@ -25,6 +28,39 @@
2528
("import_data", "add_node"),
2629
]
2730

31+
REPOSITORIES_DIR = Path.cwd() / "repositories"
32+
33+
34+
def repo_name_from_url(url: str) -> str:
35+
return urlparse(url).path.rstrip("/").split("/")[-1].removesuffix(".git")
36+
37+
38+
def clone_repository(url: str, path: Path) -> Path:
39+
if path.exists():
40+
shutil.rmtree(path)
41+
42+
path.parent.mkdir(parents=True, exist_ok=True)
43+
subprocess.run(
44+
["git", "clone", "--depth", "1", url, str(path)],
45+
check=True,
46+
capture_output=True,
47+
text=True,
48+
)
49+
50+
return path
51+
52+
53+
def load_project(url: str) -> Project:
54+
repo_path = REPOSITORIES_DIR / repo_name_from_url(url)
55+
56+
if (repo_path / ".git").exists():
57+
logger.info("Using cached repository clone at %s", repo_path)
58+
else:
59+
logger.info("Cloning repository into cache at %s", repo_path)
60+
clone_repository(url, repo_path)
61+
62+
return Project.from_local_repository(repo_path)
63+
2864

2965
def ensure_calls_edges(graph_name: str) -> None:
3066
"""Ensure required CALLS edges exist for E2E tests.
@@ -63,7 +99,7 @@ def ensure_calls_edges(graph_name: str) -> None:
6399
def main():
64100
for url in REPOS:
65101
logger.info("Seeding %s ...", url)
66-
proj = Project.from_git_repository(url)
102+
proj = load_project(url)
67103
proj.analyze_sources()
68104
logger.info("Done seeding %s", url)
69105

tests/test_seed_test_data.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from pathlib import Path
2+
3+
import e2e.seed_test_data as seed_test_data
4+
5+
6+
def test_load_project_uses_cached_clone(monkeypatch, tmp_path):
7+
repo_path = tmp_path / "GraphRAG-SDK"
8+
(repo_path / ".git").mkdir(parents=True)
9+
10+
calls = []
11+
12+
class FakeProject:
13+
@staticmethod
14+
def from_local_repository(path):
15+
calls.append(path)
16+
return path
17+
18+
monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path)
19+
monkeypatch.setattr(seed_test_data, "Project", FakeProject)
20+
21+
project = seed_test_data.load_project("https://github.com/FalkorDB/GraphRAG-SDK")
22+
23+
assert project == repo_path
24+
assert calls == [repo_path]
25+
26+
27+
def test_load_project_clones_into_cache(monkeypatch, tmp_path):
28+
repo_path = tmp_path / "flask"
29+
clone_calls = []
30+
project_calls = []
31+
32+
class FakeProject:
33+
@staticmethod
34+
def from_local_repository(path):
35+
project_calls.append(path)
36+
return path
37+
38+
def fake_clone(url: str, path: Path) -> Path:
39+
clone_calls.append((url, path))
40+
(path / ".git").mkdir(parents=True)
41+
return path
42+
43+
monkeypatch.setattr(seed_test_data, "REPOSITORIES_DIR", tmp_path)
44+
monkeypatch.setattr(seed_test_data, "Project", FakeProject)
45+
monkeypatch.setattr(seed_test_data, "clone_repository", fake_clone)
46+
47+
project = seed_test_data.load_project("https://github.com/pallets/flask")
48+
49+
assert project == repo_path
50+
assert clone_calls == [("https://github.com/pallets/flask", repo_path)]
51+
assert project_calls == [repo_path]

0 commit comments

Comments
 (0)