Skip to content

Commit b74bd5a

Browse files
committed
fix: paginate PR files and reviews API calls to fetch complete data
get_pull_request_files() and get_pull_request_reviews() were not passing per_page or handling pagination. GitHub defaults to 30 items per page, so PRs with 31+ files would silently receive an incomplete file list -- causing MaxPrLocCondition (and every other condition reading changed_files) to undercount LOC and potentially pass when it should fail. - Set per_page=100 (GitHub max) and loop until all pages fetched - Gracefully return partial results if a later page errors - Add 6 tests covering multi-page, single-page, per_page param, reviews pagination, and error-on-page-2 scenarios
1 parent 92140b4 commit b74bd5a

2 files changed

Lines changed: 155 additions & 24 deletions

File tree

src/integrations/github/api.py

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,11 @@ async def get_check_runs(self, repo: str, sha: str, installation_id: int) -> lis
471471
return []
472472

473473
async def get_pull_request_reviews(self, repo: str, pr_number: int, installation_id: int) -> list[dict[str, Any]]:
474-
"""Get reviews for a pull request."""
474+
"""Get reviews for a pull request.
475+
476+
Paginates through all pages to ensure the full review list is returned.
477+
GitHub defaults to 30 reviews per page; max is 100.
478+
"""
475479
try:
476480
token = await self.get_installation_access_token(installation_id)
477481
if not token:
@@ -480,20 +484,34 @@ async def get_pull_request_reviews(self, repo: str, pr_number: int, installation
480484

481485
headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"}
482486

483-
url = f"{config.github.api_base_url}/repos/{repo}/pulls/{pr_number}/reviews"
487+
all_reviews: list[dict[str, Any]] = []
488+
page = 1
489+
per_page = 100
484490

485491
session = await self._get_session()
486-
async with session.get(url, headers=headers) as response:
487-
if response.status == 200:
492+
while True:
493+
url = (
494+
f"{config.github.api_base_url}/repos/{repo}/pulls/{pr_number}"
495+
f"/reviews?per_page={per_page}&page={page}"
496+
)
497+
async with session.get(url, headers=headers) as response:
498+
if response.status != 200:
499+
error_text = await response.text()
500+
logger.error(
501+
f"Failed to get reviews for PR #{pr_number} in {repo}. "
502+
f"Status: {response.status}, Response: {error_text}"
503+
)
504+
break
488505
result = await response.json()
489-
logger.info(f"Retrieved {len(result)} reviews for PR #{pr_number} in {repo}")
490-
return cast("list[dict[str, Any]]", result)
491-
else:
492-
error_text = await response.text()
493-
logger.error(
494-
f"Failed to get reviews for PR #{pr_number} in {repo}. Status: {response.status}, Response: {error_text}"
495-
)
496-
return []
506+
if not result:
507+
break
508+
all_reviews.extend(result)
509+
if len(result) < per_page:
510+
break
511+
page += 1
512+
513+
logger.info(f"Retrieved {len(all_reviews)} reviews for PR #{pr_number} in {repo}")
514+
return all_reviews
497515
except Exception as e:
498516
logger.error(f"Error getting reviews for PR #{pr_number} in {repo}: {e}")
499517
return []
@@ -555,7 +573,12 @@ async def get_pull_request_review_threads(
555573
return []
556574

557575
async def get_pull_request_files(self, repo: str, pr_number: int, installation_id: int) -> list[dict[str, Any]]:
558-
"""Get files changed in a pull request."""
576+
"""Get files changed in a pull request.
577+
578+
Paginates through all pages to ensure the full file list is returned.
579+
GitHub defaults to 30 files per page; max is 100. PRs with more than
580+
3 000 files are truncated by the API regardless of pagination.
581+
"""
559582
try:
560583
token = await self.get_installation_access_token(installation_id)
561584
if not token:
@@ -564,20 +587,34 @@ async def get_pull_request_files(self, repo: str, pr_number: int, installation_i
564587

565588
headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"}
566589

567-
url = f"{config.github.api_base_url}/repos/{repo}/pulls/{pr_number}/files"
590+
all_files: list[dict[str, Any]] = []
591+
page = 1
592+
per_page = 100
568593

569594
session = await self._get_session()
570-
async with session.get(url, headers=headers) as response:
571-
if response.status == 200:
595+
while True:
596+
url = (
597+
f"{config.github.api_base_url}/repos/{repo}/pulls/{pr_number}"
598+
f"/files?per_page={per_page}&page={page}"
599+
)
600+
async with session.get(url, headers=headers) as response:
601+
if response.status != 200:
602+
error_text = await response.text()
603+
logger.error(
604+
f"Failed to get files for PR #{pr_number} in {repo}. "
605+
f"Status: {response.status}, Response: {error_text}"
606+
)
607+
break
572608
result = await response.json()
573-
logger.info(f"Retrieved {len(result)} files for PR #{pr_number} in {repo}")
574-
return cast("list[dict[str, Any]]", result)
575-
else:
576-
error_text = await response.text()
577-
logger.error(
578-
f"Failed to get files for PR #{pr_number} in {repo}. Status: {response.status}, Response: {error_text}"
579-
)
580-
return []
609+
if not result:
610+
break
611+
all_files.extend(result)
612+
if len(result) < per_page:
613+
break
614+
page += 1
615+
616+
logger.info(f"Retrieved {len(all_files)} files for PR #{pr_number} in {repo}")
617+
return all_files
581618
except Exception as e:
582619
logger.error(f"Error getting files for PR #{pr_number} in {repo}: {e}")
583620
return []

tests/unit/integrations/github/test_api.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,3 +222,97 @@ async def test_list_pull_requests_success(github_client, mock_aiohttp_session):
222222
prs = await github_client.list_pull_requests("owner/repo", installation_id=123)
223223

224224
assert prs == [{"number": 1}]
225+
226+
227+
@pytest.mark.asyncio
228+
async def test_get_pull_request_files_paginates(github_client, mock_aiohttp_session):
229+
"""Files endpoint should fetch all pages when results fill a page."""
230+
mock_token_response = mock_aiohttp_session.create_mock_response(201, json_data={"token": "access_token"})
231+
mock_aiohttp_session.post.return_value = mock_token_response
232+
233+
# Page 1: full page (100 items) triggers fetching page 2
234+
page1 = [{"filename": f"file_{i}.py"} for i in range(100)]
235+
page2 = [{"filename": f"file_{i}.py"} for i in range(100, 135)]
236+
237+
mock_resp_page1 = mock_aiohttp_session.create_mock_response(200, json_data=page1)
238+
mock_resp_page2 = mock_aiohttp_session.create_mock_response(200, json_data=page2)
239+
240+
mock_aiohttp_session.get.side_effect = [mock_resp_page1, mock_resp_page2]
241+
242+
files = await github_client.get_pull_request_files("owner/repo", 1, installation_id=123)
243+
244+
assert len(files) == 135
245+
assert files[0]["filename"] == "file_0.py"
246+
assert files[-1]["filename"] == "file_134.py"
247+
assert mock_aiohttp_session.get.call_count == 2
248+
249+
250+
@pytest.mark.asyncio
251+
async def test_get_pull_request_files_single_page(github_client, mock_aiohttp_session):
252+
"""Files endpoint should not paginate when results don't fill a page."""
253+
mock_token_response = mock_aiohttp_session.create_mock_response(201, json_data={"token": "access_token"})
254+
mock_aiohttp_session.post.return_value = mock_token_response
255+
256+
page1 = [{"filename": f"file_{i}.py"} for i in range(30)]
257+
mock_resp = mock_aiohttp_session.create_mock_response(200, json_data=page1)
258+
mock_aiohttp_session.get.return_value = mock_resp
259+
260+
files = await github_client.get_pull_request_files("owner/repo", 1, installation_id=123)
261+
262+
assert len(files) == 30
263+
assert mock_aiohttp_session.get.call_count == 1
264+
265+
266+
@pytest.mark.asyncio
267+
async def test_get_pull_request_files_uses_per_page_100(github_client, mock_aiohttp_session):
268+
"""Files endpoint should request per_page=100."""
269+
mock_token_response = mock_aiohttp_session.create_mock_response(201, json_data={"token": "access_token"})
270+
mock_aiohttp_session.post.return_value = mock_token_response
271+
272+
mock_resp = mock_aiohttp_session.create_mock_response(200, json_data=[])
273+
mock_aiohttp_session.get.return_value = mock_resp
274+
275+
await github_client.get_pull_request_files("owner/repo", 1, installation_id=123)
276+
277+
call_args = mock_aiohttp_session.get.call_args
278+
url = call_args[0][0]
279+
assert "per_page=100" in url
280+
assert "page=1" in url
281+
282+
283+
@pytest.mark.asyncio
284+
async def test_get_pull_request_reviews_paginates(github_client, mock_aiohttp_session):
285+
"""Reviews endpoint should fetch all pages when results fill a page."""
286+
mock_token_response = mock_aiohttp_session.create_mock_response(201, json_data={"token": "access_token"})
287+
mock_aiohttp_session.post.return_value = mock_token_response
288+
289+
page1 = [{"id": i, "state": "APPROVED"} for i in range(100)]
290+
page2 = [{"id": i, "state": "CHANGES_REQUESTED"} for i in range(100, 110)]
291+
292+
mock_resp_page1 = mock_aiohttp_session.create_mock_response(200, json_data=page1)
293+
mock_resp_page2 = mock_aiohttp_session.create_mock_response(200, json_data=page2)
294+
295+
mock_aiohttp_session.get.side_effect = [mock_resp_page1, mock_resp_page2]
296+
297+
reviews = await github_client.get_pull_request_reviews("owner/repo", 1, installation_id=123)
298+
299+
assert len(reviews) == 110
300+
assert mock_aiohttp_session.get.call_count == 2
301+
302+
303+
@pytest.mark.asyncio
304+
async def test_get_pull_request_files_error_on_page2(github_client, mock_aiohttp_session):
305+
"""Files endpoint should return partial results if a later page errors."""
306+
mock_token_response = mock_aiohttp_session.create_mock_response(201, json_data={"token": "access_token"})
307+
mock_aiohttp_session.post.return_value = mock_token_response
308+
309+
page1 = [{"filename": f"file_{i}.py"} for i in range(100)]
310+
mock_resp_page1 = mock_aiohttp_session.create_mock_response(200, json_data=page1)
311+
mock_resp_page2 = mock_aiohttp_session.create_mock_response(500, text_data="Internal Server Error")
312+
313+
mock_aiohttp_session.get.side_effect = [mock_resp_page1, mock_resp_page2]
314+
315+
files = await github_client.get_pull_request_files("owner/repo", 1, installation_id=123)
316+
317+
# Should return the first page's results even if page 2 fails
318+
assert len(files) == 100

0 commit comments

Comments
 (0)