From 2a79417c3e7d040592bd0ea02f278ee4a8c91fa2 Mon Sep 17 00:00:00 2001 From: Varin Nair Date: Wed, 28 Jan 2026 11:00:58 -0800 Subject: [PATCH 1/6] fix: fix fetch DROID_COMMENT_ID bug (#22) --- src/entrypoints/update-comment-link.ts | 36 +-- .../comments/fetch-droid-comment.ts | 94 ++++++ test/fetch-droid-comment.test.ts | 291 ++++++++++++++++++ 3 files changed, 394 insertions(+), 27 deletions(-) create mode 100644 src/github/operations/comments/fetch-droid-comment.ts create mode 100644 test/fetch-droid-comment.test.ts diff --git a/src/entrypoints/update-comment-link.ts b/src/entrypoints/update-comment-link.ts index ca9344f..64559ed 100644 --- a/src/entrypoints/update-comment-link.ts +++ b/src/entrypoints/update-comment-link.ts @@ -14,6 +14,7 @@ import { import { GITHUB_SERVER_URL } from "../github/api/config"; import { updateDroidComment } from "../github/operations/comments/update-droid-comment"; +import { fetchDroidComment } from "../github/operations/comments/fetch-droid-comment"; async function run() { try { @@ -39,33 +40,14 @@ async function run() { let isPRReviewComment = false; try { - // GitHub has separate ID namespaces for review comments and issue comments - // We need to use the correct API based on the event type - if (isPullRequestReviewCommentEvent(context)) { - // For PR review comments, use the pulls API - console.log(`Fetching PR review comment ${commentId}`); - const { data: prComment } = await octokit.rest.pulls.getReviewComment({ - owner, - repo, - comment_id: commentId, - }); - comment = prComment; - isPRReviewComment = true; - console.log("Successfully fetched as PR review comment"); - } - - // For all other event types, use the issues API - if (!comment) { - console.log(`Fetching issue comment ${commentId}`); - const { data: issueComment } = await octokit.rest.issues.getComment({ - owner, - repo, - comment_id: commentId, - }); - comment = issueComment; - isPRReviewComment = false; - console.log("Successfully fetched as issue comment"); - } + const result = await fetchDroidComment(octokit, { + owner, + repo, + commentId, + isPullRequestReviewCommentEvent: isPullRequestReviewCommentEvent(context), + }); + comment = result.comment; + isPRReviewComment = result.isPRReviewComment; } catch (finalError) { // If all attempts fail, try to determine more information about the comment console.error("Failed to fetch comment. Debug info:"); diff --git a/src/github/operations/comments/fetch-droid-comment.ts b/src/github/operations/comments/fetch-droid-comment.ts new file mode 100644 index 0000000..98eb071 --- /dev/null +++ b/src/github/operations/comments/fetch-droid-comment.ts @@ -0,0 +1,94 @@ +import type { Octokits } from "../../api/client"; + +export interface FetchDroidCommentParams { + owner: string; + repo: string; + commentId: number; + isPullRequestReviewCommentEvent: boolean; +} + +export interface FetchDroidCommentResult { + comment: { body: string | null }; + isPRReviewComment: boolean; +} + +/** + * Fetches a comment from GitHub, trying both issue comment and PR review comment APIs. + * + * GitHub uses separate ID namespaces for review comments and issue comments. + * The comment type doesn't always match the event type (e.g., a PR review comment + * could have been created in a previous step, but now we're in a pull_request event). + * This function tries both endpoints to handle all cases. + */ +export async function fetchDroidComment( + octokit: Octokits, + params: FetchDroidCommentParams, +): Promise { + const { owner, repo, commentId, isPullRequestReviewCommentEvent } = params; + + let comment: { body: string | null } | undefined; + let isPRReviewComment = false; + + // First, try the endpoint matching the event type for efficiency + if (isPullRequestReviewCommentEvent) { + try { + console.log(`Fetching PR review comment ${commentId}`); + const { data: prComment } = await octokit.rest.pulls.getReviewComment({ + owner, + repo, + comment_id: commentId, + }); + comment = { body: prComment.body ?? null }; + isPRReviewComment = true; + console.log("Successfully fetched as PR review comment"); + } catch { + // Fall through to try issue comment API + console.log("PR review comment fetch failed, trying issue comment API"); + } + } + + // Try issue comment API + if (!comment) { + try { + console.log(`Fetching issue comment ${commentId}`); + const { data: issueComment } = await octokit.rest.issues.getComment({ + owner, + repo, + comment_id: commentId, + }); + comment = { body: issueComment.body ?? null }; + isPRReviewComment = false; + console.log("Successfully fetched as issue comment"); + } catch (issueError) { + // If event wasn't a PR review comment event, try review comment API as fallback + if (!isPullRequestReviewCommentEvent) { + console.log( + "Issue comment fetch failed, trying PR review comment API", + ); + try { + const { data: prComment } = await octokit.rest.pulls.getReviewComment( + { + owner, + repo, + comment_id: commentId, + }, + ); + comment = { body: prComment.body ?? null }; + isPRReviewComment = true; + console.log("Successfully fetched as PR review comment"); + } catch { + // Both APIs failed, throw the original issue error + throw issueError; + } + } else { + throw issueError; + } + } + } + + if (!comment) { + throw new Error("Failed to fetch comment"); + } + + return { comment, isPRReviewComment }; +} diff --git a/test/fetch-droid-comment.test.ts b/test/fetch-droid-comment.test.ts new file mode 100644 index 0000000..eeb67e5 --- /dev/null +++ b/test/fetch-droid-comment.test.ts @@ -0,0 +1,291 @@ +import { describe, test, expect, jest, beforeEach } from "bun:test"; +import type { Octokits } from "../src/github/api/client"; +import { + fetchDroidComment, + type FetchDroidCommentParams, +} from "../src/github/operations/comments/fetch-droid-comment"; + +describe("fetchDroidComment", () => { + let mockOctokit: Octokits; + + beforeEach(() => { + mockOctokit = { + rest: { + issues: { + getComment: jest.fn(), + }, + pulls: { + getReviewComment: jest.fn(), + }, + }, + } as unknown as Octokits; + }); + + test("should fetch issue comment successfully when event is not PR review comment", async () => { + const mockIssueComment = { + data: { + id: 123456, + body: "Test issue comment", + html_url: + "https://github.com/owner/repo/issues/1#issuecomment-123456", + }, + }; + + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest + .fn() + .mockResolvedValue(mockIssueComment); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 123456, + isPullRequestReviewCommentEvent: false, + }; + + const result = await fetchDroidComment(mockOctokit, params); + + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalledWith({ + owner: "testowner", + repo: "testrepo", + comment_id: 123456, + }); + expect(result.comment.body).toBe("Test issue comment"); + expect(result.isPRReviewComment).toBe(false); + }); + + test("should fetch PR review comment successfully when event is PR review comment", async () => { + const mockPRReviewComment = { + data: { + id: 789012, + body: "Test PR review comment", + html_url: + "https://github.com/owner/repo/pull/1#discussion_r789012", + }, + }; + + // @ts-expect-error Mock implementation + mockOctokit.rest.pulls.getReviewComment = jest + .fn() + .mockResolvedValue(mockPRReviewComment); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 789012, + isPullRequestReviewCommentEvent: true, + }; + + const result = await fetchDroidComment(mockOctokit, params); + + expect(mockOctokit.rest.pulls.getReviewComment).toHaveBeenCalledWith({ + owner: "testowner", + repo: "testrepo", + comment_id: 789012, + }); + expect(result.comment.body).toBe("Test PR review comment"); + expect(result.isPRReviewComment).toBe(true); + }); + + test("should fallback to PR review comment API when issue comment API returns 404 (non-PR-review-comment event)", async () => { + // This is the key test case: event is pull_request (not pull_request_review_comment) + // but the comment ID is actually a PR review comment ID + const mockError = new Error("Not Found") as Error & { status: number }; + mockError.status = 404; + + const mockPRReviewComment = { + data: { + id: 3812927863, + body: "Test PR review comment fetched via fallback", + html_url: + "https://github.com/owner/repo/pull/8179#discussion_r3812927863", + }, + }; + + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest.fn().mockRejectedValue(mockError); + // @ts-expect-error Mock implementation + mockOctokit.rest.pulls.getReviewComment = jest + .fn() + .mockResolvedValue(mockPRReviewComment); + + const params: FetchDroidCommentParams = { + owner: "trywingman", + repo: "stringsai", + commentId: 3812927863, + isPullRequestReviewCommentEvent: false, // Event is pull_request, not pull_request_review_comment + }; + + const result = await fetchDroidComment(mockOctokit, params); + + // Should have tried issue comment API first + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalledWith({ + owner: "trywingman", + repo: "stringsai", + comment_id: 3812927863, + }); + + // Then should have fallen back to PR review comment API + expect(mockOctokit.rest.pulls.getReviewComment).toHaveBeenCalledWith({ + owner: "trywingman", + repo: "stringsai", + comment_id: 3812927863, + }); + + expect(result.comment.body).toBe( + "Test PR review comment fetched via fallback", + ); + expect(result.isPRReviewComment).toBe(true); + }); + + test("should fallback to issue comment API when PR review comment API fails for PR review comment event", async () => { + const mockError = new Error("Not Found") as Error & { status: number }; + mockError.status = 404; + + const mockIssueComment = { + data: { + id: 456789, + body: "Test issue comment fetched via fallback", + html_url: + "https://github.com/owner/repo/issues/1#issuecomment-456789", + }, + }; + + // @ts-expect-error Mock implementation + mockOctokit.rest.pulls.getReviewComment = jest + .fn() + .mockRejectedValue(mockError); + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest + .fn() + .mockResolvedValue(mockIssueComment); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 456789, + isPullRequestReviewCommentEvent: true, + }; + + const result = await fetchDroidComment(mockOctokit, params); + + // Should have tried PR review comment API first + expect(mockOctokit.rest.pulls.getReviewComment).toHaveBeenCalledWith({ + owner: "testowner", + repo: "testrepo", + comment_id: 456789, + }); + + // Then should have fallen back to issue comment API + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalledWith({ + owner: "testowner", + repo: "testrepo", + comment_id: 456789, + }); + + expect(result.comment.body).toBe( + "Test issue comment fetched via fallback", + ); + expect(result.isPRReviewComment).toBe(false); + }); + + test("should throw error when both APIs fail for non-PR-review-comment event", async () => { + const issueError = new Error("Issue comment not found") as Error & { + status: number; + }; + issueError.status = 404; + + const prReviewError = new Error("PR review comment not found") as Error & { + status: number; + }; + prReviewError.status = 404; + + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest + .fn() + .mockRejectedValue(issueError); + // @ts-expect-error Mock implementation + mockOctokit.rest.pulls.getReviewComment = jest + .fn() + .mockRejectedValue(prReviewError); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 999999, + isPullRequestReviewCommentEvent: false, + }; + + // Should throw the original issue error after both APIs fail + await expect(fetchDroidComment(mockOctokit, params)).rejects.toThrow( + "Issue comment not found", + ); + + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalled(); + expect(mockOctokit.rest.pulls.getReviewComment).toHaveBeenCalled(); + }); + + test("should throw error when both APIs fail for PR review comment event", async () => { + const prReviewError = new Error("PR review comment not found") as Error & { + status: number; + }; + prReviewError.status = 404; + + const issueError = new Error("Issue comment not found") as Error & { + status: number; + }; + issueError.status = 404; + + // @ts-expect-error Mock implementation + mockOctokit.rest.pulls.getReviewComment = jest + .fn() + .mockRejectedValue(prReviewError); + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest + .fn() + .mockRejectedValue(issueError); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 888888, + isPullRequestReviewCommentEvent: true, + }; + + // Should throw the issue error (from the fallback attempt) + await expect(fetchDroidComment(mockOctokit, params)).rejects.toThrow( + "Issue comment not found", + ); + + expect(mockOctokit.rest.pulls.getReviewComment).toHaveBeenCalled(); + expect(mockOctokit.rest.issues.getComment).toHaveBeenCalled(); + }); + + test("should handle null body in comment", async () => { + const mockIssueComment = { + data: { + id: 111222, + body: null, + html_url: + "https://github.com/owner/repo/issues/1#issuecomment-111222", + }, + }; + + // @ts-expect-error Mock implementation + mockOctokit.rest.issues.getComment = jest + .fn() + .mockResolvedValue(mockIssueComment); + + const params: FetchDroidCommentParams = { + owner: "testowner", + repo: "testrepo", + commentId: 111222, + isPullRequestReviewCommentEvent: false, + }; + + const result = await fetchDroidComment(mockOctokit, params); + + expect(result.comment.body).toBeNull(); + expect(result.isPRReviewComment).toBe(false); + }); +}); From 3bfa39ca63dd86bf2b013851e678e6e2e418e246 Mon Sep 17 00:00:00 2001 From: Varin Nair Date: Thu, 29 Jan 2026 11:38:20 -0800 Subject: [PATCH 2/6] fix: handle deleted tracking comment gracefully in update-comment-link (#23) --- src/entrypoints/update-comment-link.ts | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/entrypoints/update-comment-link.ts b/src/entrypoints/update-comment-link.ts index 64559ed..ad57b16 100644 --- a/src/entrypoints/update-comment-link.ts +++ b/src/entrypoints/update-comment-link.ts @@ -49,7 +49,25 @@ async function run() { comment = result.comment; isPRReviewComment = result.isPRReviewComment; } catch (finalError) { - // If all attempts fail, try to determine more information about the comment + // Check if this is a 404 error (comment was deleted) + const is404 = + finalError instanceof Error && + "status" in finalError && + (finalError as { status: number }).status === 404; + + if (is404) { + // Comment was deleted (possibly by user, spam filter, or another automation) + // This is not a critical failure - the main review/action likely completed + console.log( + `⚠️ Comment ${commentId} no longer exists (404). Skipping update.`, + ); + console.log( + "This can happen if the comment was deleted by a user, spam filter, or another automation.", + ); + process.exit(0); + } + + // For non-404 errors, log debug info and fail console.error("Failed to fetch comment. Debug info:"); console.error(`Comment ID: ${commentId}`); console.error(`Event name: ${context.eventName}`); From 7e4b9ade9d2dd35cff9c9b0b83502697253aee51 Mon Sep 17 00:00:00 2001 From: User Date: Fri, 30 Jan 2026 13:45:55 -0800 Subject: [PATCH 3/6] fix: reduce workflow permissions from contents:write to contents:read --- .github/workflows/droid-review.yml | 2 +- .github/workflows/droid.yml | 2 +- README.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/droid-review.yml b/.github/workflows/droid-review.yml index 051fff7..59f9c58 100644 --- a/.github/workflows/droid-review.yml +++ b/.github/workflows/droid-review.yml @@ -9,7 +9,7 @@ jobs: if: github.event.pull_request.draft == false runs-on: ubuntu-latest permissions: - contents: write + contents: read pull-requests: write issues: write id-token: write diff --git a/.github/workflows/droid.yml b/.github/workflows/droid.yml index 81c757f..ae439c4 100644 --- a/.github/workflows/droid.yml +++ b/.github/workflows/droid.yml @@ -22,7 +22,7 @@ jobs: (github.event_name == 'pull_request' && (contains(github.event.pull_request.body, '@droid') || contains(github.event.pull_request.title, '@droid'))) runs-on: ubuntu-latest permissions: - contents: write + contents: read pull-requests: write issues: write id-token: write diff --git a/README.md b/README.md index b3cfb4d..ce3ae2d 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ jobs: (github.event_name == 'pull_request' && (contains(github.event.pull_request.body, '@droid') || contains(github.event.pull_request.title, '@droid'))) runs-on: ubuntu-latest permissions: - contents: write + contents: read pull-requests: write issues: write id-token: write @@ -82,7 +82,7 @@ jobs: if: github.event.pull_request.draft == false runs-on: ubuntu-latest permissions: - contents: write + contents: read pull-requests: write issues: write id-token: write From 03b9825026e7a1a3bca8e45d22fa529ce9f6d3af Mon Sep 17 00:00:00 2001 From: Varin Nair Date: Thu, 19 Feb 2026 19:20:47 -0800 Subject: [PATCH 4/6] Release/2026 02 19 (#38) * fix: pre-compute review artifacts and deduplicate into shared module, remove security review from workflow (#34) * fix: pre-compute review artifacts and deduplicate into shared module, remove security review from workflow * test * fix: use review-candidates prompt and two-pass validator for automatic review flow * add gpt-5.2 default model * fix: default to gpt-5.2 with high reasoning and simplify model config * fix: include MCP tools in --enabled-tools for droid exec * add debugging * test with --list-tools * fix: resolve MCP server paths relative to repo root instead of sub-action directory * add model and reasoning effort * refactor: remove combine action and related prompt generation files * add --tag flags to droid exec calls * fix: reuse pre-computed review artifacts in validator step instead of redundant checkout * feat: update tracking comment after review completes (#35) * update tag * feat: update tracking comment after review completes in review sub-action Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: make tracking comment update non-blocking with continue-on-error Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: copy ~/.factory to temp dir before upload-artifact (tilde not expanded by Node.js glob) * fix: add include-hidden-files to upload-artifact and copy .factory via /Users/user --------- Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * update to v3 (#36) * update to v3 * test dev * feat: update droid-review and droid workflows to streamline review process and enhance permissions * fix * update to v3 --------- Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- .factory/droids/file-group-reviewer.md | 6 +- .github/workflows/droid-review.yml | 121 +--------- .github/workflows/droid.yml | 4 +- README.md | 10 +- action.yml | 17 +- combine/action.yml | 81 ------- review/action.yml | 75 +++++- security/action.yml | 1 + src/create-prompt/index.ts | 6 + src/create-prompt/templates/combine-prompt.ts | 101 --------- src/create-prompt/templates/review-prompt.ts | 34 ++- .../templates/security-review-prompt.ts | 2 +- src/create-prompt/types.ts | 1 + src/entrypoints/combine-reviews.ts | 213 ------------------ src/entrypoints/generate-combine-prompt.ts | 112 --------- src/entrypoints/generate-review-prompt.ts | 115 ++++++++-- src/entrypoints/prepare-validator.ts | 4 +- src/entrypoints/update-comment-link.ts | 3 +- src/github/data/review-artifacts.ts | 169 ++++++++++++++ .../comments/fetch-droid-comment.ts | 4 +- src/mcp/install-mcp-server.ts | 31 ++- src/tag/commands/fill.ts | 1 + src/tag/commands/review-validator.ts | 150 ++---------- src/tag/commands/review.ts | 154 ++----------- .../templates/review-prompt.test.ts | 21 ++ .../templates/security-review-prompt.test.ts | 21 ++ test/entrypoints/prepare-validator.test.ts | 2 +- test/fetch-droid-comment.test.ts | 16 +- test/github/data/review-artifacts.test.ts | 204 +++++++++++++++++ test/integration/review-flow.test.ts | 41 ++-- test/modes/tag/review-command.test.ts | 29 +-- 31 files changed, 763 insertions(+), 986 deletions(-) delete mode 100644 combine/action.yml delete mode 100644 src/create-prompt/templates/combine-prompt.ts delete mode 100644 src/entrypoints/combine-reviews.ts delete mode 100644 src/entrypoints/generate-combine-prompt.ts create mode 100644 src/github/data/review-artifacts.ts create mode 100644 test/github/data/review-artifacts.test.ts diff --git a/.factory/droids/file-group-reviewer.md b/.factory/droids/file-group-reviewer.md index 7ba489b..7da7902 100644 --- a/.factory/droids/file-group-reviewer.md +++ b/.factory/droids/file-group-reviewer.md @@ -10,6 +10,7 @@ You are a senior staff software engineer and expert code reviewer. Your task: Review the assigned files from the PR and generate a JSON array of **high-confidence, actionable** review comments that pinpoint genuine issues. + - You are currently checked out to the PR branch. - Review ALL files assigned to you thoroughly. - Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems. @@ -24,7 +25,7 @@ Your task: Review the assigned files from the PR and generate a JSON array of ** - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches) - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics) - Only flag issues you are confident about—avoid speculative or stylistic nitpicks. - + 1. Read each assigned file in full to understand the context @@ -57,6 +58,7 @@ Return your findings as a JSON array (no wrapper object, just the array): If no issues found, return an empty array: `[]` Field definitions: + - `path`: Relative file path (must match exactly as provided in your assignment) - `body`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation - P0: Critical bugs (crashes, security vulnerabilities, data loss) @@ -65,7 +67,7 @@ Field definitions: - `line`: Target line number (single-line) or end line number (multi-line). Must be ≥ 0. - `startLine`: `null` for single-line comments, or start line number for multi-line comments - `side`: "RIGHT" for new/modified code (default), "LEFT" only for commenting on removed code - + - Output ONLY the JSON array—no additional commentary or markdown formatting around it. diff --git a/.github/workflows/droid-review.yml b/.github/workflows/droid-review.yml index 44a8a6f..31a1f6d 100644 --- a/.github/workflows/droid-review.yml +++ b/.github/workflows/droid-review.yml @@ -4,36 +4,14 @@ on: pull_request: types: [opened, ready_for_review, reopened] +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: - prepare: + droid-review: if: github.event.pull_request.draft == false runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write - issues: write - id-token: write - outputs: - comment_id: ${{ steps.prepare.outputs.comment_id }} - run_code_review: ${{ steps.prepare.outputs.run_code_review }} - run_security_review: ${{ steps.prepare.outputs.run_security_review }} - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - fetch-depth: 1 - - - name: Prepare - id: prepare - uses: Factory-AI/droid-action/prepare@v2 - with: - factory_api_key: ${{ secrets.FACTORY_API_KEY }} - automatic_review: true - - code-review: - needs: prepare - if: needs.prepare.outputs.run_code_review == 'true' - runs-on: ubuntu-latest permissions: contents: write pull-requests: write @@ -46,91 +24,8 @@ jobs: with: fetch-depth: 1 - - name: Run Code Review - uses: Factory-AI/droid-action/review@v2 + - name: Run Droid Auto Review + uses: Factory-AI/droid-action@v3 with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} - tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} - output_file: ${{ runner.temp }}/code-review-results.json - - - name: Upload Results - uses: actions/upload-artifact@v4 - with: - name: code-review-results - path: ${{ runner.temp }}/code-review-results.json - if-no-files-found: ignore - - security-review: - needs: prepare - if: needs.prepare.outputs.run_security_review == 'true' - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - issues: write - id-token: write - actions: read - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - fetch-depth: 1 - - - name: Run Security Review - uses: Factory-AI/droid-action/security@v2 - with: - factory_api_key: ${{ secrets.FACTORY_API_KEY }} - tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} - security_severity_threshold: medium - output_file: ${{ runner.temp }}/security-results.json - - - name: Upload Results - uses: actions/upload-artifact@v4 - with: - name: security-results - path: ${{ runner.temp }}/security-results.json - if-no-files-found: ignore - - combine: - needs: [prepare, code-review, security-review] - # Run combine when EITHER code review OR security review was executed - if: | - always() && - (needs.prepare.outputs.run_code_review == 'true' || - needs.prepare.outputs.run_security_review == 'true') - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - issues: write - id-token: write - actions: read - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - fetch-depth: 1 - - - name: Download Code Review Results - uses: actions/download-artifact@v4 - with: - name: code-review-results - path: ${{ runner.temp }} - continue-on-error: true - - - name: Download Security Results - uses: actions/download-artifact@v4 - with: - name: security-results - path: ${{ runner.temp }} - continue-on-error: true - - - name: Combine Results - uses: Factory-AI/droid-action/combine@v2 - with: - factory_api_key: ${{ secrets.FACTORY_API_KEY }} - tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} - code_review_results: ${{ runner.temp }}/code-review-results.json - security_results: ${{ runner.temp }}/security-results.json - code_review_status: ${{ needs.code-review.result }} - security_review_status: ${{ needs.security-review.result }} + automatic_review: true diff --git a/.github/workflows/droid.yml b/.github/workflows/droid.yml index 36c7be8..8b55471 100644 --- a/.github/workflows/droid.yml +++ b/.github/workflows/droid.yml @@ -22,7 +22,7 @@ jobs: (github.event_name == 'pull_request' && (contains(github.event.pull_request.body, '@droid') || contains(github.event.pull_request.title, '@droid'))) runs-on: ubuntu-latest permissions: - contents: read + contents: write pull-requests: write issues: write id-token: write @@ -34,6 +34,6 @@ jobs: fetch-depth: 1 - name: Run Droid Exec - uses: Factory-AI/droid-action@v2 + uses: Factory-AI/droid-action@v3 with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} diff --git a/README.md b/README.md index 40285f7..1a7138e 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ jobs: fetch-depth: 1 - name: Run Droid Exec - uses: Factory-AI/droid-action@v2 + uses: Factory-AI/droid-action@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} ``` @@ -104,7 +104,7 @@ jobs: - name: Prepare id: prepare - uses: Factory-AI/droid-action/prepare@v2 + uses: Factory-AI/droid-action/prepare@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} automatic_review: true @@ -127,7 +127,7 @@ jobs: fetch-depth: 1 - name: Run Code Review - uses: Factory-AI/droid-action/review@v2 + uses: Factory-AI/droid-action/review@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} @@ -157,7 +157,7 @@ jobs: fetch-depth: 1 - name: Run Security Review - uses: Factory-AI/droid-action/security@v2 + uses: Factory-AI/droid-action/security@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} @@ -206,7 +206,7 @@ jobs: continue-on-error: true - name: Combine Results - uses: Factory-AI/droid-action/combine@v2 + uses: Factory-AI/droid-action/combine@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} tracking_comment_id: ${{ needs.prepare.outputs.comment_id }} diff --git a/action.yml b/action.yml index 457b734..c716538 100644 --- a/action.yml +++ b/action.yml @@ -98,7 +98,7 @@ inputs: reasoning_effort: description: "Override reasoning effort for review flows (passed to Droid Exec as --reasoning-effort). If empty and review_model is also empty, the action defaults internally to gpt-5.2 at high reasoning." required: false - default: "" + default: "high" review_use_validator: description: "Enable two-pass review: generate candidate comments to JSON, then validate and post only approved ones." required: false @@ -373,7 +373,6 @@ runs: DETAILED_PERMISSION_MESSAGES: "1" FACTORY_API_KEY: ${{ inputs.factory_api_key }} - - name: Update comment with job link if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.droid_comment_id && always() shell: bash @@ -396,16 +395,22 @@ runs: TRACK_PROGRESS: ${{ inputs.track_progress }} AUTOMATIC_REVIEW: ${{ inputs.automatic_review }} + - name: Collect .factory debug files + if: always() && steps.prepare.outputs.contains_trigger == 'true' + shell: bash + run: | + if [ -d "$HOME/.factory" ]; then + cp -r "$HOME/.factory" "${{ runner.temp }}/.factory" + fi + - name: Upload debug artifacts if: always() && steps.prepare.outputs.contains_trigger == 'true' uses: actions/upload-artifact@v4 with: name: droid-review-debug-${{ github.run_id }} path: | - ~/.factory/logs/droid-log-single.log - ~/.factory/logs/console.log - ~/.factory/sessions/* - ~/.factory/droids/* + ${{ runner.temp }}/.factory/** ${{ runner.temp }}/droid-prompts/** + include-hidden-files: true if-no-files-found: ignore retention-days: 7 diff --git a/combine/action.yml b/combine/action.yml deleted file mode 100644 index e35eabc..0000000 --- a/combine/action.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: "Droid Combine Results" -description: "Combine code review and security review results, post inline comments, update summary" - -inputs: - factory_api_key: - description: "Factory API key" - required: true - tracking_comment_id: - description: "ID of the tracking comment to update" - required: true - code_review_results: - description: "Path to code review results JSON (optional)" - required: false - default: "" - security_results: - description: "Path to security results JSON (optional)" - required: false - default: "" - code_review_status: - description: "Code review job status (success/failure/skipped)" - required: false - default: "skipped" - security_review_status: - description: "Security review job status (success/failure/skipped)" - required: false - default: "skipped" - -runs: - using: "composite" - steps: - - name: Install Bun - uses: oven-sh/setup-bun@735343b667d3e6f658f44d0eca948eb6282f2b76 - with: - bun-version: 1.2.11 - - - name: Install Dependencies - shell: bash - run: | - cd ${{ github.action_path }}/.. - bun install - cd ${{ github.action_path }}/../base-action - bun install - - - name: Install Droid CLI - shell: bash - run: | - curl --retry 5 --retry-delay 2 --retry-all-errors -fsSL https://app.factory.ai/cli | sh - echo "$HOME/.local/bin" >> "$GITHUB_PATH" - "$HOME/.local/bin/droid" --version - - - name: Get GitHub Token - id: token - shell: bash - run: | - bun run ${{ github.action_path }}/../src/entrypoints/get-token.ts - env: - FACTORY_API_KEY: ${{ inputs.factory_api_key }} - - - name: Generate Combine Prompt - id: prompt - shell: bash - run: | - bun run ${{ github.action_path }}/../src/entrypoints/generate-combine-prompt.ts - env: - GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} - DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} - CODE_REVIEW_RESULTS: ${{ inputs.code_review_results }} - SECURITY_RESULTS: ${{ inputs.security_results }} - CODE_REVIEW_STATUS: ${{ inputs.code_review_status }} - SECURITY_REVIEW_STATUS: ${{ inputs.security_review_status }} - - - name: Run Combine - shell: bash - run: | - bun run ${{ github.action_path }}/../base-action/src/index.ts - env: - INPUT_PROMPT_FILE: ${{ runner.temp }}/droid-prompts/droid-prompt.txt - INPUT_DROID_ARGS: ${{ steps.prompt.outputs.droid_args }} - INPUT_MCP_TOOLS: ${{ steps.prompt.outputs.mcp_tools }} - FACTORY_API_KEY: ${{ inputs.factory_api_key }} - GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} diff --git a/review/action.yml b/review/action.yml index 257ad9c..6d8e620 100644 --- a/review/action.yml +++ b/review/action.yml @@ -15,11 +15,27 @@ inputs: review_model: description: "Model to use for review" required: false - default: "" + default: "gpt-5.2" + reasoning_effort: + description: "Reasoning effort for review (passed to Droid Exec as --reasoning-effort)" + required: false + default: "high" output_file: description: "Path to write review results JSON" required: false default: "" + review_use_validator: + description: "Enable two-pass review: generate candidate comments, then validate and post only approved ones." + required: false + default: "true" + review_candidates_path: + description: "Path to write review candidates JSON (pass 1)." + required: false + default: "${{ runner.temp }}/droid-prompts/review_candidates.json" + review_validated_path: + description: "Path to write review validated JSON (pass 2)." + required: false + default: "${{ runner.temp }}/droid-prompts/review_validated.json" outputs: conclusion: @@ -68,6 +84,19 @@ runs: DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} REVIEW_MODEL: ${{ inputs.review_model }} REVIEW_TYPE: "code" + REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} + REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} + REASONING_EFFORT: ${{ inputs.reasoning_effort }} + DROID_OUTPUT_FILE: ${{ inputs.output_file }} + + - name: Setup Custom Droids + shell: bash + run: | + mkdir -p ~/.factory/droids + if [ -d "${{ github.action_path }}/../.factory/droids" ]; then + cp -r ${{ github.action_path }}/../.factory/droids/* ~/.factory/droids/ + echo "Copied custom droids to ~/.factory/droids/" + fi - name: Run Code Review id: review @@ -81,3 +110,47 @@ runs: FACTORY_API_KEY: ${{ inputs.factory_api_key }} GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} DROID_OUTPUT_FILE: ${{ inputs.output_file }} + + - name: Prepare Validator + id: prepare_validator + if: steps.prompt.outputs.review_use_validator == 'true' + shell: bash + run: | + bun run ${{ github.action_path }}/../src/entrypoints/prepare-validator.ts + env: + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} + REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} + REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} + REVIEW_MODEL: ${{ inputs.review_model }} + REASONING_EFFORT: ${{ inputs.reasoning_effort }} + DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} + + - name: Run Validator + id: validator + if: steps.prompt.outputs.review_use_validator == 'true' + shell: bash + run: | + bun run ${{ github.action_path }}/../base-action/src/index.ts + env: + INPUT_PROMPT_FILE: ${{ runner.temp }}/droid-prompts/droid-prompt.txt + INPUT_DROID_ARGS: ${{ steps.prepare_validator.outputs.droid_args }} + INPUT_MCP_TOOLS: ${{ steps.prepare_validator.outputs.mcp_tools }} + FACTORY_API_KEY: ${{ inputs.factory_api_key }} + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + + - name: Update tracking comment + if: always() && inputs.tracking_comment_id + continue-on-error: true + shell: bash + run: | + bun run ${{ github.action_path }}/../src/entrypoints/update-comment-link.ts + env: + DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + OUTPUT_FILE: ${{ inputs.output_file }} + TRIGGER_USERNAME: ${{ github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} + PREPARE_SUCCESS: "true" + DROID_SUCCESS: ${{ (steps.prompt.outputs.review_use_validator == 'true' && steps.validator.outcome == 'success') || (steps.prompt.outputs.review_use_validator != 'true' && steps.review.outcome == 'success') }} diff --git a/security/action.yml b/security/action.yml index 84655d4..45e1c4e 100644 --- a/security/action.yml +++ b/security/action.yml @@ -115,6 +115,7 @@ runs: SECURITY_BLOCK_ON_CRITICAL: ${{ inputs.security_block_on_critical }} SECURITY_BLOCK_ON_HIGH: ${{ inputs.security_block_on_high }} REVIEW_TYPE: "security" + DROID_OUTPUT_FILE: ${{ inputs.output_file }} - name: Run Security Review id: review diff --git a/src/create-prompt/index.ts b/src/create-prompt/index.ts index 0cd8d22..415013f 100644 --- a/src/create-prompt/index.ts +++ b/src/create-prompt/index.ts @@ -303,6 +303,7 @@ export type PromptCreationOptions = { disallowedTools?: string[]; includeActionsTools?: boolean; reviewArtifacts?: ReviewArtifacts; + outputFilePath?: string; }; export async function createPrompt({ @@ -316,6 +317,7 @@ export async function createPrompt({ disallowedTools = [], includeActionsTools = false, reviewArtifacts, + outputFilePath, }: PromptCreationOptions) { try { const droidCommentId = commentId?.toString(); @@ -328,6 +330,10 @@ export async function createPrompt({ reviewArtifacts, ); + if (outputFilePath) { + preparedContext.outputFilePath = outputFilePath; + } + await mkdir(`${process.env.RUNNER_TEMP || "/tmp"}/droid-prompts`, { recursive: true, }); diff --git a/src/create-prompt/templates/combine-prompt.ts b/src/create-prompt/templates/combine-prompt.ts deleted file mode 100644 index 73d2de5..0000000 --- a/src/create-prompt/templates/combine-prompt.ts +++ /dev/null @@ -1,101 +0,0 @@ -import type { PreparedContext } from "../types"; - -export function generateCombinePrompt( - context: PreparedContext, - codeReviewResultsPath: string, - securityResultsPath: string, -): string { - const prNumber = context.eventData.isPR - ? context.eventData.prNumber - : context.githubContext && "entityNumber" in context.githubContext - ? String(context.githubContext.entityNumber) - : "unknown"; - - const repoFullName = context.repository; - - return `You are combining code review and security review results for PR #${prNumber} in ${repoFullName}. -The gh CLI is installed and authenticated via GH_TOKEN. - -## Context -- Repo: ${repoFullName} -- PR Number: ${prNumber} -- Code Review Results: ${codeReviewResultsPath} -- Security Review Results: ${securityResultsPath} - -## Task - -1. Read the results files (if they exist): - - ${codeReviewResultsPath} - Code review findings - - ${securityResultsPath} - Security review findings - -2. Combine and deduplicate findings: - - Merge findings from both reviews - - Remove duplicates (same file + line + similar description) - - Prioritize security findings over code review findings for overlaps - -3. Post inline comments for all unique findings using github_inline_comment___create_inline_comment: - - Use side="RIGHT" for new/modified code - - Include severity, description, and suggested fix where available - - For security findings, include CWE reference - -4. Analyze the PR diff to generate: - - A concise 1-2 sentence summary of what the PR does - - 3-5 key changes extracted from the diff - - The most important files changed (up to 5-7 files) - -5. Update the tracking comment with combined summary using github_comment___update_droid_comment: - -IMPORTANT: Do NOT use github_pr___submit_review. Only update the tracking comment and post inline comments. -The tracking comment IS the summary - do not create any other summary comments. - -\`\`\`markdown -## Code review completed - -### Summary -{Brief 1-2 sentence description of what this PR does} - -### Key Changes -- {Change 1} -- {Change 2} -- {Change 3} - -### Important Files Changed -- \`path/to/file1.ts\` - {Brief description of changes} -- \`path/to/file2.ts\` - {Brief description of changes} - -### Code Review -| Type | Count | -|------|-------| -| 🐛 Bugs | X | -| ⚠️ Issues | X | -| 💡 Suggestions | X | - -### Security Review -| Severity | Count | -|----------|-------| -| 🚨 CRITICAL | X | -| 🔴 HIGH | X | -| 🟡 MEDIUM | X | -| 🟢 LOW | X | - -### Findings -| ID | Type | Severity | File | Description | -|----|------|----------|------|-------------| -| ... | ... | ... | ... | ... | - -[View workflow run](link) -\`\`\` - -## Available Tools -- github_comment___update_droid_comment - Update tracking comment (this is the ONLY place for the summary) -- github_inline_comment___create_inline_comment - Post inline comments on specific lines -- Read, Grep, Glob, LS, Execute - File operations - -DO NOT use github_pr___submit_review - it creates duplicate summary comments. - -## Important -- If no results files exist or they're empty, report "No issues found" -- Maximum 10 inline comments total -- Deduplicate findings that appear in both reviews -`; -} diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts index 2d53eb8..5863ab2 100644 --- a/src/create-prompt/templates/review-prompt.ts +++ b/src/create-prompt/templates/review-prompt.ts @@ -353,5 +353,37 @@ In the submitted review body: * State whether the changes are correct or incorrect * Provide a 1-3 sentence overall assessment -`; +${ + context.outputFilePath + ? ` +--- + +## Output File (REQUIRED) + +After completing your review, you MUST write your findings to \`${context.outputFilePath}\` as a JSON file with this structure: + +\`\`\`json +{ + "type": "code-review", + "findings": [ + { + "id": "CR-001", + "severity": "P0|P1|P2|P3", + "file": "path/to/file.ts", + "line": 55, + "side": "RIGHT", + "description": "Brief description of the issue", + "suggestion": "Optional suggested fix" + } + ], + "summary": "Brief overall summary of the review" +} +\`\`\` + +If no issues were found, write: \`{"type": "code-review", "findings": [], "summary": "No issues found"}\` + +This file is required for downstream processing of review results. +` + : "" +}`; } diff --git a/src/create-prompt/templates/security-review-prompt.ts b/src/create-prompt/templates/security-review-prompt.ts index f71e3dc..7e4425f 100644 --- a/src/create-prompt/templates/security-review-prompt.ts +++ b/src/create-prompt/templates/security-review-prompt.ts @@ -109,7 +109,7 @@ You have access to these Factory security skills (installed in ~/.factory/skills IMPORTANT: Do NOT post inline comments directly. Instead, write findings to a JSON file. The finalize step will post all inline comments to avoid overlapping with code review comments. -1. Write findings to \`security-review-results.json\` with this structure: +1. Write findings to \`${context.outputFilePath || "security-review-results.json"}\` with this structure: \`\`\`json { "type": "security", diff --git a/src/create-prompt/types.ts b/src/create-prompt/types.ts index edcfdd0..cf110c3 100644 --- a/src/create-prompt/types.ts +++ b/src/create-prompt/types.ts @@ -117,4 +117,5 @@ export type PreparedContext = CommonFields & { headRefOid: string; }; reviewArtifacts?: ReviewArtifacts; + outputFilePath?: string; }; diff --git a/src/entrypoints/combine-reviews.ts b/src/entrypoints/combine-reviews.ts deleted file mode 100644 index fe8933f..0000000 --- a/src/entrypoints/combine-reviews.ts +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env bun - -/** - * Combine results from code review and security review into a single summary - * Updates the tracking comment with the combined results - */ - -import * as core from "@actions/core"; -import { readFile } from "fs/promises"; -import { createOctokit } from "../github/api/client"; -import { parseGitHubContext } from "../github/context"; -import { GITHUB_SERVER_URL } from "../github/api/config"; - -interface ReviewFinding { - id: string; - type: string; - severity?: string; - file: string; - line: number; - description: string; - cwe?: string; -} - -interface ReviewResults { - type: "code" | "security"; - findings: ReviewFinding[]; - summary?: string; -} - -async function loadResults(filePath: string): Promise { - if (!filePath || filePath === "") { - return null; - } - - try { - const content = await readFile(filePath, "utf-8"); - return JSON.parse(content); - } catch (error) { - console.warn(`Could not load results from ${filePath}:`, error); - return null; - } -} - -function generateCombinedSummary( - codeResults: ReviewResults | null, - securityResults: ReviewResults | null, - codeStatus: string, - securityStatus: string, - jobUrl: string, -): string { - const sections: string[] = []; - - // Header - sections.push("## 🔍 PR Review Summary\n"); - - // Status overview - const statusTable = ["| Review Type | Status |", "|-------------|--------|"]; - - if (codeStatus !== "skipped") { - const codeIcon = codeStatus === "success" ? "✅" : "❌"; - statusTable.push(`| Code Review | ${codeIcon} ${codeStatus} |`); - } - - if (securityStatus !== "skipped") { - const securityIcon = securityStatus === "success" ? "✅" : "❌"; - statusTable.push(`| Security Review | ${securityIcon} ${securityStatus} |`); - } - - if (statusTable.length > 2) { - sections.push(statusTable.join("\n")); - sections.push(""); - } - - // Code Review Section - if (codeResults && codeResults.findings.length > 0) { - sections.push("### 📝 Code Review Findings\n"); - sections.push("| ID | Type | File | Line | Description |"); - sections.push("|----|------|------|------|-------------|"); - - for (const finding of codeResults.findings.slice(0, 10)) { - sections.push( - `| ${finding.id} | ${finding.type} | \`${finding.file}\` | ${finding.line} | ${finding.description} |`, - ); - } - - if (codeResults.findings.length > 10) { - sections.push( - `\n*...and ${codeResults.findings.length - 10} more findings*`, - ); - } - sections.push(""); - } else if (codeStatus === "success") { - sections.push("### 📝 Code Review\n"); - sections.push("✅ No code quality issues found.\n"); - } - - // Security Review Section - if (securityResults && securityResults.findings.length > 0) { - sections.push("### 🔐 Security Review Findings\n"); - - // Severity counts - const severityCounts = { - CRITICAL: 0, - HIGH: 0, - MEDIUM: 0, - LOW: 0, - }; - - for (const finding of securityResults.findings) { - const sev = (finding.severity?.toUpperCase() || - "MEDIUM") as keyof typeof severityCounts; - if (sev in severityCounts) { - severityCounts[sev]++; - } - } - - sections.push("| Severity | Count |"); - sections.push("|----------|-------|"); - if (severityCounts.CRITICAL > 0) - sections.push(`| 🚨 CRITICAL | ${severityCounts.CRITICAL} |`); - if (severityCounts.HIGH > 0) - sections.push(`| 🔴 HIGH | ${severityCounts.HIGH} |`); - if (severityCounts.MEDIUM > 0) - sections.push(`| 🟡 MEDIUM | ${severityCounts.MEDIUM} |`); - if (severityCounts.LOW > 0) - sections.push(`| 🟢 LOW | ${severityCounts.LOW} |`); - sections.push(""); - - // Findings table - sections.push("| ID | Severity | Type | File | Line | Reference |"); - sections.push("|----|----------|------|------|------|-----------|"); - - for (const finding of securityResults.findings.slice(0, 10)) { - const cweLink = finding.cwe - ? `[${finding.cwe}](https://cwe.mitre.org/data/definitions/${finding.cwe.replace("CWE-", "")}.html)` - : "-"; - sections.push( - `| ${finding.id} | ${finding.severity || "MEDIUM"} | ${finding.type} | \`${finding.file}\` | ${finding.line} | ${cweLink} |`, - ); - } - - if (securityResults.findings.length > 10) { - sections.push( - `\n*...and ${securityResults.findings.length - 10} more findings*`, - ); - } - sections.push(""); - } else if (securityStatus === "success") { - sections.push("### 🔐 Security Review\n"); - sections.push("✅ No security vulnerabilities found.\n"); - } - - // Footer with job link - sections.push(`---\n[View job run](${jobUrl})`); - - return sections.join("\n"); -} - -async function run() { - try { - const githubToken = process.env.GITHUB_TOKEN!; - const commentId = parseInt(process.env.DROID_COMMENT_ID || "0"); - const codeResultsPath = process.env.CODE_REVIEW_RESULTS || ""; - const securityResultsPath = process.env.SECURITY_RESULTS || ""; - const codeStatus = process.env.CODE_REVIEW_STATUS || "skipped"; - const securityStatus = process.env.SECURITY_REVIEW_STATUS || "skipped"; - const runId = process.env.GITHUB_RUN_ID || ""; - - if (!commentId) { - throw new Error("DROID_COMMENT_ID is required"); - } - - const context = parseGitHubContext(); - const { owner, repo } = context.repository; - const octokit = createOctokit(githubToken); - - // Load results from artifacts - const codeResults = await loadResults(codeResultsPath); - const securityResults = await loadResults(securityResultsPath); - - // Generate job URL - const jobUrl = `${GITHUB_SERVER_URL}/${owner}/${repo}/actions/runs/${runId}`; - - // Generate combined summary - const summary = generateCombinedSummary( - codeResults, - securityResults, - codeStatus, - securityStatus, - jobUrl, - ); - - // Update the tracking comment - await octokit.rest.issues.updateComment({ - owner, - repo, - comment_id: commentId, - body: summary, - }); - - console.log( - `✅ Updated tracking comment ${commentId} with combined summary`, - ); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - core.setFailed(`Combine reviews failed: ${errorMessage}`); - process.exit(1); - } -} - -if (import.meta.main) { - run(); -} diff --git a/src/entrypoints/generate-combine-prompt.ts b/src/entrypoints/generate-combine-prompt.ts deleted file mode 100644 index bcafe32..0000000 --- a/src/entrypoints/generate-combine-prompt.ts +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env bun - -/** - * Generate combine prompt for finalizing parallel reviews - */ - -import * as core from "@actions/core"; -import { createOctokit } from "../github/api/client"; -import { parseGitHubContext, isEntityContext } from "../github/context"; -import { fetchPRBranchData } from "../github/data/pr-fetcher"; -import { createPrompt } from "../create-prompt"; -import { prepareMcpTools } from "../mcp/install-mcp-server"; -import { generateCombinePrompt } from "../create-prompt/templates/combine-prompt"; -import { normalizeDroidArgs, parseAllowedTools } from "../utils/parse-tools"; - -async function run() { - try { - const githubToken = process.env.GITHUB_TOKEN!; - const commentId = parseInt(process.env.DROID_COMMENT_ID || "0"); - const codeReviewResults = process.env.CODE_REVIEW_RESULTS || ""; - const securityResults = process.env.SECURITY_RESULTS || ""; - - const context = parseGitHubContext(); - - if (!isEntityContext(context)) { - throw new Error("Combine requires entity context (PR or issue)"); - } - - if (!context.isPR) { - throw new Error("Combine is only supported on pull requests"); - } - - const octokit = createOctokit(githubToken); - - const prData = await fetchPRBranchData({ - octokits: octokit, - repository: context.repository, - prNumber: context.entityNumber, - }); - - // Generate combine prompt with paths to result files - await createPrompt({ - githubContext: context, - commentId, - baseBranch: prData.baseRefName, - prBranchData: { - headRefName: prData.headRefName, - headRefOid: prData.headRefOid, - }, - generatePrompt: (ctx) => - generateCombinePrompt(ctx, codeReviewResults, securityResults), - }); - - core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-combine"); - - const rawUserArgs = process.env.DROID_ARGS || ""; - const normalizedUserArgs = normalizeDroidArgs(rawUserArgs); - const userAllowedMCPTools = parseAllowedTools(normalizedUserArgs).filter( - (tool) => tool.startsWith("github_") && tool.includes("___"), - ); - - // Combine step has tools for inline comments and tracking comment update - // NO github_pr___submit_review - it creates duplicate summary comments - const baseTools = [ - "Read", - "Grep", - "Glob", - "LS", - "Execute", - "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", - ]; - - const allowedTools = Array.from( - new Set([...baseTools, ...userAllowedMCPTools]), - ); - - const mcpTools = await prepareMcpTools({ - githubToken, - owner: context.repository.owner, - repo: context.repository.repo, - droidCommentId: commentId.toString(), - allowedTools, - mode: "tag", - context, - }); - - const droidArgParts: string[] = []; - // Only include built-in tools in --enabled-tools - const builtInTools = allowedTools.filter((t) => !t.includes("___")); - if (builtInTools.length > 0) { - droidArgParts.push(`--enabled-tools "${builtInTools.join(",")}"`); - } - - if (normalizedUserArgs) { - droidArgParts.push(normalizedUserArgs); - } - - core.setOutput("droid_args", droidArgParts.join(" ").trim()); - core.setOutput("mcp_tools", mcpTools); - - console.log(`Generated combine prompt`); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - core.setFailed(`Generate combine prompt failed: ${errorMessage}`); - process.exit(1); - } -} - -if (import.meta.main) { - run(); -} diff --git a/src/entrypoints/generate-review-prompt.ts b/src/entrypoints/generate-review-prompt.ts index 0f74e89..0db78d0 100644 --- a/src/entrypoints/generate-review-prompt.ts +++ b/src/entrypoints/generate-review-prompt.ts @@ -5,12 +5,15 @@ */ import * as core from "@actions/core"; +import { execSync } from "child_process"; import { createOctokit } from "../github/api/client"; import { parseGitHubContext, isEntityContext } from "../github/context"; import { fetchPRBranchData } from "../github/data/pr-fetcher"; +import { computeReviewArtifacts } from "../github/data/review-artifacts"; import { createPrompt } from "../create-prompt"; import { prepareMcpTools } from "../mcp/install-mcp-server"; import { generateReviewPrompt } from "../create-prompt/templates/review-prompt"; +import { generateReviewCandidatesPrompt } from "../create-prompt/templates/review-candidates-prompt"; import { generateSecurityReviewPrompt } from "../create-prompt/templates/security-review-prompt"; import { normalizeDroidArgs, parseAllowedTools } from "../utils/parse-tools"; @@ -18,6 +21,9 @@ async function run() { try { const githubToken = process.env.GITHUB_TOKEN!; const reviewType = process.env.REVIEW_TYPE || "code"; + const reviewUseValidator = + reviewType === "code" && + (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; const commentId = parseInt(process.env.DROID_COMMENT_ID || "0"); if (!commentId) { @@ -47,11 +53,55 @@ async function run() { currentBranch: prData.headRefName, }; - // Select prompt generator based on review type + // Pre-compute review artifacts (diff, existing comments, PR description) + // so the Droid can read them directly instead of fetching via gh CLI + const tempDir = process.env.RUNNER_TEMP || "/tmp"; + + // Checkout the PR branch before computing diff to ensure HEAD points + // to the PR head commit, not the merge commit + console.log( + `Checking out PR #${context.entityNumber} branch for diff computation...`, + ); + try { + execSync("git reset --hard HEAD", { encoding: "utf8", stdio: "pipe" }); + execSync(`gh pr checkout ${context.entityNumber}`, { + encoding: "utf8", + stdio: "pipe", + env: { ...process.env, GH_TOKEN: githubToken }, + }); + console.log( + `Successfully checked out PR branch: ${execSync("git rev-parse --abbrev-ref HEAD", { encoding: "utf8" }).trim()}`, + ); + } catch (e) { + console.error(`Failed to checkout PR branch: ${e}`); + throw new Error( + `Failed to checkout PR #${context.entityNumber} branch for review`, + ); + } + + const reviewArtifacts = await computeReviewArtifacts({ + baseRef: prData.baseRefName, + tempDir, + octokit, + owner: context.repository.owner, + repo: context.repository.repo, + prNumber: context.entityNumber, + title: prData.title, + body: prData.body, + githubToken, + }); + + // Select prompt generator based on review type and validator mode const generatePrompt = reviewType === "security" ? generateSecurityReviewPrompt - : generateReviewPrompt; + : reviewUseValidator + ? generateReviewCandidatesPrompt + : generateReviewPrompt; + + // Pass the output file path so the prompt can instruct the Droid + // to write structured findings for the combine step + const outputFilePath = process.env.DROID_OUTPUT_FILE || undefined; await createPrompt({ githubContext: context, @@ -62,6 +112,8 @@ async function run() { headRefOid: prData.headRefOid, }, generatePrompt, + reviewArtifacts, + outputFilePath, }); // Set run type @@ -75,22 +127,47 @@ async function run() { (tool) => tool.startsWith("github_") && tool.includes("___"), ); - // Base tools for analysis only - NO inline comment tools - // Inline comments will be posted by the finalize step to avoid overlaps + // Base tools for analysis const baseTools = [ "Read", "Grep", "Glob", "LS", "Execute", + "Edit", + "Create", + "ApplyPatch", "github_comment___update_droid_comment", ]; - // Review tools for reading existing comments only - const reviewTools = ["github_pr___list_review_comments"]; + // Task tool is needed for parallel subagent reviews in candidate generation phase. + // FetchUrl is needed to fetch linked tickets from the PR description. + const candidateGenerationTools = reviewUseValidator + ? ["Task", "FetchUrl"] + : []; + + // When validator is enabled, the candidate generation phase should NOT + // have access to PR mutation tools. When disabled, allow them. + const reviewTools = reviewUseValidator + ? [] + : ["github_pr___list_review_comments"]; + + const safeUserAllowedMCPTools = reviewUseValidator + ? userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ) + : userAllowedMCPTools; const allowedTools = Array.from( - new Set([...baseTools, ...reviewTools, ...userAllowedMCPTools]), + new Set([ + ...baseTools, + ...candidateGenerationTools, + ...reviewTools, + ...safeUserAllowedMCPTools, + ]), ); const mcpTools = await prepareMcpTools({ @@ -104,21 +181,20 @@ async function run() { }); const droidArgParts: string[] = []; - // Only include built-in tools in --enabled-tools - // MCP tools are discovered dynamically from registered servers - const builtInTools = allowedTools.filter((t) => !t.includes("___")); - if (builtInTools.length > 0) { - droidArgParts.push(`--enabled-tools "${builtInTools.join(",")}"`); - } + droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); + droidArgParts.push('--tag "code-review"'); - // Add model override if specified - const model = + const reviewModel = reviewType === "security" ? process.env.SECURITY_MODEL?.trim() || process.env.REVIEW_MODEL?.trim() : process.env.REVIEW_MODEL?.trim(); + const reasoningEffort = process.env.REASONING_EFFORT?.trim(); - if (model) { - droidArgParts.push(`--model "${model}"`); + if (reviewModel) { + droidArgParts.push(`--model "${reviewModel}"`); + } + if (reasoningEffort) { + droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); } if (normalizedUserArgs) { @@ -128,8 +204,11 @@ async function run() { // Output for next step - use core.setOutput which handles GITHUB_OUTPUT internally core.setOutput("droid_args", droidArgParts.join(" ").trim()); core.setOutput("mcp_tools", mcpTools); + core.setOutput("review_use_validator", reviewUseValidator.toString()); - console.log(`Generated ${reviewType} review prompt`); + console.log( + `Generated ${reviewType} review prompt (validator=${reviewUseValidator})`, + ); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); core.setFailed(`Generate prompt failed: ${errorMessage}`); diff --git a/src/entrypoints/prepare-validator.ts b/src/entrypoints/prepare-validator.ts index 52c18f7..b27d3a7 100644 --- a/src/entrypoints/prepare-validator.ts +++ b/src/entrypoints/prepare-validator.ts @@ -16,7 +16,9 @@ async function run() { // This entrypoint only makes sense when the workflow input is enabled. if ((process.env.REVIEW_USE_VALIDATOR ?? "true").trim() === "false") { - throw new Error("reviewUseValidator must be true to run prepare-validator"); + throw new Error( + "reviewUseValidator must be true to run prepare-validator", + ); } const githubToken = await setupGitHubToken(); diff --git a/src/entrypoints/update-comment-link.ts b/src/entrypoints/update-comment-link.ts index ad57b16..b066a8a 100644 --- a/src/entrypoints/update-comment-link.ts +++ b/src/entrypoints/update-comment-link.ts @@ -44,7 +44,8 @@ async function run() { owner, repo, commentId, - isPullRequestReviewCommentEvent: isPullRequestReviewCommentEvent(context), + isPullRequestReviewCommentEvent: + isPullRequestReviewCommentEvent(context), }); comment = result.comment; isPRReviewComment = result.isPRReviewComment; diff --git a/src/github/data/review-artifacts.ts b/src/github/data/review-artifacts.ts new file mode 100644 index 0000000..c7d7a4f --- /dev/null +++ b/src/github/data/review-artifacts.ts @@ -0,0 +1,169 @@ +import { execSync } from "child_process"; +import { writeFile, mkdir } from "fs/promises"; +import type { Octokits } from "../api/client"; +import type { ReviewArtifacts } from "../../create-prompt/types"; + +const DIFF_MAX_BUFFER = 50 * 1024 * 1024; // 50MB buffer for large diffs + +/** + * Compute the PR diff and store it on disk. + * + * Tries git merge-base first (requires sufficient history). When that + * fails (e.g. shallow clone without unshallow support) it falls back + * to `gh pr diff` which always works. + */ +export async function computeAndStoreDiff( + baseRef: string, + tempDir: string, + options?: { githubToken?: string; prNumber?: number }, +): Promise { + const promptsDir = `${tempDir}/droid-prompts`; + await mkdir(promptsDir, { recursive: true }); + + let diff: string; + try { + // Unshallow the repo if it's a shallow clone (needed for merge-base) + try { + execSync("git rev-parse --is-shallow-repository", { + encoding: "utf8", + stdio: "pipe", + }).trim() === "true" && + execSync("git fetch --unshallow", { + encoding: "utf8", + stdio: "pipe", + }); + console.log("Unshallowed repository"); + } catch { + console.log("Repository already has full history"); + } + + // Fetch the base branch (it may not exist locally yet) + try { + execSync(`git fetch origin ${baseRef}:refs/remotes/origin/${baseRef}`, { + encoding: "utf8", + stdio: "pipe", + }); + console.log(`Fetched base branch: ${baseRef}`); + } catch { + console.log(`Base branch fetch skipped (may already exist): ${baseRef}`); + } + + const mergeBase = execSync( + `git merge-base HEAD refs/remotes/origin/${baseRef}`, + { encoding: "utf8" }, + ).trim(); + + diff = execSync(`git --no-pager diff ${mergeBase}..HEAD`, { + encoding: "utf8", + maxBuffer: DIFF_MAX_BUFFER, + }); + } catch { + // Fallback: use gh CLI to get the diff (works even with shallow clones) + if (options?.githubToken && options?.prNumber) { + console.log( + "Git merge-base failed, falling back to gh pr diff for PR diff", + ); + diff = execSync(`gh pr diff ${options.prNumber}`, { + encoding: "utf8", + maxBuffer: DIFF_MAX_BUFFER, + env: { ...process.env, GH_TOKEN: options.githubToken }, + }); + } else { + throw new Error( + "Git merge-base failed and no fallback credentials provided", + ); + } + } + + const diffPath = `${promptsDir}/pr.diff`; + await writeFile(diffPath, diff); + console.log(`Stored PR diff (${diff.length} bytes) at ${diffPath}`); + return diffPath; +} + +export async function fetchAndStoreComments( + octokit: Octokits, + owner: string, + repo: string, + prNumber: number, + tempDir: string, +): Promise { + const promptsDir = `${tempDir}/droid-prompts`; + await mkdir(promptsDir, { recursive: true }); + + const [issueComments, reviewComments] = await Promise.all([ + octokit.rest.issues.listComments({ + owner, + repo, + issue_number: prNumber, + per_page: 100, + }), + octokit.rest.pulls.listReviewComments({ + owner, + repo, + pull_number: prNumber, + per_page: 100, + }), + ]); + + const comments = { + issueComments: issueComments.data, + reviewComments: reviewComments.data, + }; + + const commentsPath = `${promptsDir}/existing_comments.json`; + await writeFile(commentsPath, JSON.stringify(comments, null, 2)); + console.log( + `Stored existing comments (${issueComments.data.length} issue, ${reviewComments.data.length} review) at ${commentsPath}`, + ); + return commentsPath; +} + +export async function storeDescription( + title: string, + body: string, + tempDir: string, +): Promise { + const promptsDir = `${tempDir}/droid-prompts`; + await mkdir(promptsDir, { recursive: true }); + + const content = `# ${title}\n\n${body}`; + const descriptionPath = `${promptsDir}/pr_description.txt`; + await writeFile(descriptionPath, content); + console.log( + `Stored PR description (${content.length} bytes) at ${descriptionPath}`, + ); + return descriptionPath; +} + +/** + * Pre-compute all review artifacts (diff, comments, description) in parallel. + */ +export async function computeReviewArtifacts(opts: { + baseRef: string; + tempDir: string; + octokit: Octokits; + owner: string; + repo: string; + prNumber: number; + title: string; + body: string; + githubToken?: string; +}): Promise { + const [diffPath, commentsPath, descriptionPath] = await Promise.all([ + computeAndStoreDiff(opts.baseRef, opts.tempDir, { + githubToken: opts.githubToken, + prNumber: opts.prNumber, + }), + fetchAndStoreComments( + opts.octokit, + opts.owner, + opts.repo, + opts.prNumber, + opts.tempDir, + ), + storeDescription(opts.title, opts.body, opts.tempDir), + ]); + + return { diffPath, commentsPath, descriptionPath }; +} diff --git a/src/github/operations/comments/fetch-droid-comment.ts b/src/github/operations/comments/fetch-droid-comment.ts index 98eb071..bbc1b25 100644 --- a/src/github/operations/comments/fetch-droid-comment.ts +++ b/src/github/operations/comments/fetch-droid-comment.ts @@ -62,9 +62,7 @@ export async function fetchDroidComment( } catch (issueError) { // If event wasn't a PR review comment event, try review comment API as fallback if (!isPullRequestReviewCommentEvent) { - console.log( - "Issue comment fetch failed, trying PR review comment API", - ); + console.log("Issue comment fetch failed, trying PR review comment API"); try { const { data: prComment } = await octokit.rest.pulls.getReviewComment( { diff --git a/src/mcp/install-mcp-server.ts b/src/mcp/install-mcp-server.ts index 0714523..da6ff80 100644 --- a/src/mcp/install-mcp-server.ts +++ b/src/mcp/install-mcp-server.ts @@ -1,4 +1,6 @@ import * as core from "@actions/core"; +import { existsSync } from "fs"; +import path from "path"; import { GITHUB_API_URL, GITHUB_SERVER_URL } from "../github/api/config"; import type { GitHubContext } from "../github/context"; import { isEntityContext } from "../github/context"; @@ -61,6 +63,15 @@ export async function prepareMcpTools( try { const allowedToolsList = allowedTools || []; + // GITHUB_ACTION_PATH points to either: + // - The repo root when using the monolithic action (Factory-AI/droid-action@dev) + // - A sub-action directory (e.g., review/) when using sub-actions + // Detect which case by checking if src/ exists at the action path directly. + const actionPath = process.env.GITHUB_ACTION_PATH || "."; + const repoRoot = existsSync(path.join(actionPath, "src")) + ? actionPath + : path.resolve(actionPath, ".."); + const hasGitHubMcpTools = allowedToolsList.some((tool) => tool.startsWith("github___"), ); @@ -79,10 +90,7 @@ export async function prepareMcpTools( baseMcpTools.mcpServers.github_comment = { command: "bun", - args: [ - "run", - `${process.env.GITHUB_ACTION_PATH}/src/mcp/github-comment-server.ts`, - ], + args: ["run", `${repoRoot}/src/mcp/github-comment-server.ts`], env: { GITHUB_TOKEN: githubToken, REPO_OWNER: owner, @@ -101,10 +109,7 @@ export async function prepareMcpTools( ) { baseMcpTools.mcpServers.github_inline_comment = { command: "bun", - args: [ - "run", - `${process.env.GITHUB_ACTION_PATH}/src/mcp/github-inline-comment-server.ts`, - ], + args: ["run", `${repoRoot}/src/mcp/github-inline-comment-server.ts`], env: { GITHUB_TOKEN: githubToken, REPO_OWNER: owner, @@ -136,10 +141,7 @@ export async function prepareMcpTools( } baseMcpTools.mcpServers.github_ci = { command: "bun", - args: [ - "run", - `${process.env.GITHUB_ACTION_PATH}/src/mcp/github-actions-server.ts`, - ], + args: ["run", `${repoRoot}/src/mcp/github-actions-server.ts`], env: { // Use workflow github token, not app token GITHUB_TOKEN: process.env.DEFAULT_WORKFLOW_TOKEN, @@ -154,10 +156,7 @@ export async function prepareMcpTools( if (isEntityContext(context) && context.isPR && hasGitHubPRTools) { baseMcpTools.mcpServers.github_pr = { command: "bun", - args: [ - "run", - `${process.env.GITHUB_ACTION_PATH}/src/mcp/github-pr-server.ts`, - ], + args: ["run", `${repoRoot}/src/mcp/github-pr-server.ts`], env: { GITHUB_TOKEN: githubToken, REPO_OWNER: owner, diff --git a/src/tag/commands/fill.ts b/src/tag/commands/fill.ts index 1cc7e31..3ae1a5a 100644 --- a/src/tag/commands/fill.ts +++ b/src/tag/commands/fill.ts @@ -91,6 +91,7 @@ export async function prepareFillMode({ const droidArgParts: string[] = []; droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); + droidArgParts.push('--tag "droid-fill"'); // Add model override if specified const fillModel = process.env.FILL_MODEL?.trim(); diff --git a/src/tag/commands/review-validator.ts b/src/tag/commands/review-validator.ts index 9d574eb..4e9148d 100644 --- a/src/tag/commands/review-validator.ts +++ b/src/tag/commands/review-validator.ts @@ -4,103 +4,11 @@ import { isEntityContext } from "../../github/context"; import type { Octokits } from "../../github/api/client"; import { fetchPRBranchData } from "../../github/data/pr-fetcher"; import { createPrompt } from "../../create-prompt"; -import type { ReviewArtifacts } from "../../create-prompt"; +import type { ReviewArtifacts } from "../../create-prompt/types"; import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; import type { PrepareResult } from "../../prepare/types"; import { generateReviewValidatorPrompt } from "../../create-prompt/templates/review-validator-prompt"; -import { execSync } from "child_process"; -import { mkdir, writeFile } from "fs/promises"; - -const DIFF_MAX_BUFFER = 50 * 1024 * 1024; - -async function computeAndStoreDiff( - baseRef: string, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - try { - execSync("git fetch --unshallow", { encoding: "utf8", stdio: "pipe" }); - console.log("Unshallowed repository"); - } catch { - console.log("Repository already has full history"); - } - - try { - execSync(`git fetch origin ${baseRef}:refs/remotes/origin/${baseRef}`, { - encoding: "utf8", - stdio: "pipe", - }); - console.log(`Fetched base branch: ${baseRef}`); - } catch (e) { - console.error(`Failed to fetch base branch ${baseRef}: ${e}`); - throw new Error(`Failed to fetch base branch ${baseRef} for review`); - } - - const mergeBase = execSync(`git merge-base HEAD origin/${baseRef}`, { - encoding: "utf8", - stdio: "pipe", - }).trim(); - - const diff = execSync(`git --no-pager diff ${mergeBase}..HEAD`, { - encoding: "utf8", - stdio: "pipe", - maxBuffer: DIFF_MAX_BUFFER, - }); - - const diffPath = `${promptsDir}/pr.diff`; - await writeFile(diffPath, diff); - console.log(`Stored PR diff (${diff.length} bytes) at ${diffPath}`); - - return diffPath; -} - -async function fetchAndStoreComments( - octokit: Octokits, - owner: string, - repo: string, - prNumber: number, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - const [issueComments, reviewComments] = await Promise.all([ - octokit.rest.issues.listComments({ owner, repo, issue_number: prNumber }), - octokit.rest.pulls.listReviewComments({ owner, repo, pull_number: prNumber }), - ]); - - const commentsPath = `${promptsDir}/existing_comments.json`; - const payload = { - issueComments: issueComments.data, - reviewComments: reviewComments.data, - }; - await writeFile(commentsPath, JSON.stringify(payload, null, 2)); - console.log( - `Stored existing comments (${issueComments.data.length} issue, ${reviewComments.data.length} review) at ${commentsPath}`, - ); - - return commentsPath; -} - -async function storeDescription( - title: string, - body: string, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - const content = `# ${title}\n\n${body}`; - const descriptionPath = `${promptsDir}/pr_description.txt`; - await writeFile(descriptionPath, content); - console.log( - `Stored PR description (${content.length} bytes) at ${descriptionPath}`, - ); - return descriptionPath; -} export async function prepareReviewValidatorMode({ context, @@ -119,41 +27,23 @@ export async function prepareReviewValidatorMode({ const prData = await fetchPRBranchData({ octokits: octokit, - repository: { owner: context.repository.owner, repo: context.repository.repo }, + repository: { + owner: context.repository.owner, + repo: context.repository.repo, + }, prNumber: context.entityNumber, }); + // The PR branch is already checked out and review artifacts (diff, + // comments, description) were already computed by the generate-review-prompt + // step earlier in this job. Reuse them from disk instead of recomputing. const tempDir = process.env.RUNNER_TEMP || "/tmp"; - - // Ensure PR branch is checked out (same as review mode) - try { - execSync("git reset --hard HEAD", { encoding: "utf8", stdio: "pipe" } as any); - execSync(`gh pr checkout ${context.entityNumber}`, { - encoding: "utf8", - stdio: "pipe", - env: { ...process.env, GH_TOKEN: githubToken }, - }); - console.log( - `Successfully checked out PR branch: ${execSync("git rev-parse --abbrev-ref HEAD", { encoding: "utf8" } as any).trim()}`, - ); - } catch (e) { - console.error(`Failed to checkout PR branch: ${e}`); - throw new Error(`Failed to checkout PR #${context.entityNumber} branch for review`); - } - - const [diffPath, commentsPath, descriptionPath] = await Promise.all([ - computeAndStoreDiff(prData.baseRefName, tempDir), - fetchAndStoreComments( - octokit, - context.repository.owner, - context.repository.repo, - context.entityNumber, - tempDir, - ), - storeDescription(prData.title, prData.body, tempDir), - ]); - - const reviewArtifacts: ReviewArtifacts = { diffPath, commentsPath, descriptionPath }; + const promptsDir = `${tempDir}/droid-prompts`; + const reviewArtifacts: ReviewArtifacts = { + diffPath: `${promptsDir}/pr.diff`, + commentsPath: `${promptsDir}/existing_comments.json`, + descriptionPath: `${promptsDir}/pr_description.txt`, + }; await createPrompt({ githubContext: context, @@ -207,16 +97,16 @@ export async function prepareReviewValidatorMode({ const droidArgParts: string[] = []; droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); + droidArgParts.push('--tag "code-review"'); const reviewModel = process.env.REVIEW_MODEL?.trim(); const reasoningEffort = process.env.REASONING_EFFORT?.trim(); - if (!reviewModel && !reasoningEffort) { - droidArgParts.push(`--model "gpt-5.2"`); - droidArgParts.push(`--reasoning-effort "high"`); - } else { - if (reviewModel) droidArgParts.push(`--model "${reviewModel}"`); - if (reasoningEffort) droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); + if (reviewModel) { + droidArgParts.push(`--model "${reviewModel}"`); + } + if (reasoningEffort) { + droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); } if (normalizedUserArgs) { diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts index a69ce85..4fabb33 100644 --- a/src/tag/commands/review.ts +++ b/src/tag/commands/review.ts @@ -1,10 +1,9 @@ import * as core from "@actions/core"; import { execSync } from "child_process"; -import { writeFile, mkdir } from "fs/promises"; import type { GitHubContext } from "../../github/context"; import { fetchPRBranchData } from "../../github/data/pr-fetcher"; +import { computeReviewArtifacts } from "../../github/data/review-artifacts"; import { createPrompt } from "../../create-prompt"; -import type { ReviewArtifacts } from "../../create-prompt"; import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { createInitialComment } from "../../github/operations/comments/create-initial"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; @@ -14,107 +13,6 @@ import { generateReviewCandidatesPrompt } from "../../create-prompt/templates/re import type { Octokits } from "../../github/api/client"; import type { PrepareResult } from "../../prepare/types"; -const DIFF_MAX_BUFFER = 50 * 1024 * 1024; // 50MB buffer for large diffs - -async function computeAndStoreDiff( - baseRef: string, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - // Unshallow the repo if it's a shallow clone (needed for merge-base to work) - try { - execSync("git fetch --unshallow", { encoding: "utf8", stdio: "pipe" }); - console.log("Unshallowed repository"); - } catch (e) { - // Already unshallowed or not a shallow clone, continue - console.log("Repository already has full history"); - } - - // Fetch the base branch (it may not exist locally yet) - try { - execSync( - `git fetch origin ${baseRef}:refs/remotes/origin/${baseRef}`, - { encoding: "utf8", stdio: "pipe" }, - ); - console.log(`Fetched base branch: ${baseRef}`); - } catch (e) { - // Branch might already exist, continue - console.log(`Base branch fetch skipped (may already exist): ${baseRef}`); - } - - const mergeBase = execSync( - `git merge-base HEAD refs/remotes/origin/${baseRef}`, - { encoding: "utf8" }, - ).trim(); - - const diff = execSync(`git --no-pager diff ${mergeBase}..HEAD`, { - encoding: "utf8", - maxBuffer: DIFF_MAX_BUFFER, - }); - - const diffPath = `${promptsDir}/pr.diff`; - await writeFile(diffPath, diff); - console.log(`Stored PR diff (${diff.length} bytes) at ${diffPath}`); - return diffPath; -} - -async function fetchAndStoreComments( - octokit: Octokits, - owner: string, - repo: string, - prNumber: number, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - const [issueComments, reviewComments] = await Promise.all([ - octokit.rest.issues.listComments({ - owner, - repo, - issue_number: prNumber, - per_page: 100, - }), - octokit.rest.pulls.listReviewComments({ - owner, - repo, - pull_number: prNumber, - per_page: 100, - }), - ]); - - const comments = { - issueComments: issueComments.data, - reviewComments: reviewComments.data, - }; - - const commentsPath = `${promptsDir}/existing_comments.json`; - await writeFile(commentsPath, JSON.stringify(comments, null, 2)); - console.log( - `Stored existing comments (${issueComments.data.length} issue, ${reviewComments.data.length} review) at ${commentsPath}`, - ); - return commentsPath; -} - -async function storeDescription( - title: string, - body: string, - tempDir: string, -): Promise { - const promptsDir = `${tempDir}/droid-prompts`; - await mkdir(promptsDir, { recursive: true }); - - const content = `# ${title}\n\n${body}`; - const descriptionPath = `${promptsDir}/pr_description.txt`; - await writeFile(descriptionPath, content); - console.log( - `Stored PR description (${content.length} bytes) at ${descriptionPath}`, - ); - return descriptionPath; -} - type ReviewCommandOptions = { context: GitHubContext; octokit: Octokits; @@ -175,23 +73,17 @@ export async function prepareReviewMode({ // Pre-compute review artifacts (diff, existing comments, and PR description) const tempDir = process.env.RUNNER_TEMP || "/tmp"; - const [diffPath, commentsPath, descriptionPath] = await Promise.all([ - computeAndStoreDiff(prData.baseRefName, tempDir), - fetchAndStoreComments( - octokit, - context.repository.owner, - context.repository.repo, - context.entityNumber, - tempDir, - ), - storeDescription(prData.title, prData.body, tempDir), - ]); - - const reviewArtifacts: ReviewArtifacts = { - diffPath, - commentsPath, - descriptionPath, - }; + const reviewArtifacts = await computeReviewArtifacts({ + baseRef: prData.baseRefName, + tempDir, + octokit, + owner: context.repository.owner, + repo: context.repository.repo, + prNumber: context.entityNumber, + title: prData.title, + body: prData.body, + githubToken, + }); const reviewUseValidator = (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; @@ -232,7 +124,9 @@ export async function prepareReviewMode({ // Task tool is needed for parallel subagent reviews in candidate generation phase. // FetchUrl is needed to fetch linked tickets from the PR description. - const candidateGenerationTools = reviewUseValidator ? ["Task", "FetchUrl"] : []; + const candidateGenerationTools = reviewUseValidator + ? ["Task", "FetchUrl"] + : []; const reviewTools = reviewUseValidator ? [] @@ -276,22 +170,16 @@ export async function prepareReviewMode({ const droidArgParts: string[] = []; droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); + droidArgParts.push('--tag "code-review"'); const reviewModel = process.env.REVIEW_MODEL?.trim(); const reasoningEffort = process.env.REASONING_EFFORT?.trim(); - // action.yml defaults review_model to gpt-5.2, so reviewModel is - // normally always set. The fallback keeps things working outside the action. - if (!reviewModel && !reasoningEffort) { - droidArgParts.push(`--model "gpt-5.2"`); - droidArgParts.push(`--reasoning-effort "high"`); - } else { - if (reviewModel) { - droidArgParts.push(`--model "${reviewModel}"`); - } - if (reasoningEffort) { - droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); - } + if (reviewModel) { + droidArgParts.push(`--model "${reviewModel}"`); + } + if (reasoningEffort) { + droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); } if (normalizedUserArgs) { diff --git a/test/create-prompt/templates/review-prompt.test.ts b/test/create-prompt/templates/review-prompt.test.ts index 7bcb90c..3898c6b 100644 --- a/test/create-prompt/templates/review-prompt.test.ts +++ b/test/create-prompt/templates/review-prompt.test.ts @@ -155,4 +155,25 @@ describe("generateReviewPrompt", () => { expect(prompt).toContain("pr_description.txt"); }); + + it("includes output file instructions when outputFilePath is set", () => { + const context = createBaseContext({ + outputFilePath: "/tmp/results/code-review-results.json", + }); + + const prompt = generateReviewPrompt(context); + + expect(prompt).toContain("## Output File (REQUIRED)"); + expect(prompt).toContain("/tmp/results/code-review-results.json"); + expect(prompt).toContain('"type": "code-review"'); + expect(prompt).toContain("downstream processing of review results"); + }); + + it("does not include output file section when outputFilePath is not set", () => { + const context = createBaseContext(); + + const prompt = generateReviewPrompt(context); + + expect(prompt).not.toContain("## Output File (REQUIRED)"); + }); }); diff --git a/test/create-prompt/templates/security-review-prompt.test.ts b/test/create-prompt/templates/security-review-prompt.test.ts index 5014fcd..a888d60 100644 --- a/test/create-prompt/templates/security-review-prompt.test.ts +++ b/test/create-prompt/templates/security-review-prompt.test.ts @@ -60,6 +60,27 @@ describe("generateSecurityReviewPrompt", () => { expect(prompt).toContain("LOW"); }); + it("uses outputFilePath when provided", () => { + const context = createBaseContext({ + outputFilePath: "/tmp/results/security-results.json", + }); + + const prompt = generateSecurityReviewPrompt(context); + + expect(prompt).toContain("/tmp/results/security-results.json"); + expect(prompt).not.toContain( + "Write findings to `security-review-results.json`", + ); + }); + + it("falls back to default filename when outputFilePath is not set", () => { + const context = createBaseContext(); + + const prompt = generateSecurityReviewPrompt(context); + + expect(prompt).toContain("security-review-results.json"); + }); + it("includes security configuration from context", () => { const contextWithConfig = createBaseContext({ githubContext: { diff --git a/test/entrypoints/prepare-validator.test.ts b/test/entrypoints/prepare-validator.test.ts index 6b3bc80..28f9d96 100644 --- a/test/entrypoints/prepare-validator.test.ts +++ b/test/entrypoints/prepare-validator.test.ts @@ -45,7 +45,7 @@ describe("prepare-validator entrypoint", () => { } as any); const mod = await import( - `../../src/entrypoints/prepare-validator.ts?test=${Math.random()}`, + `../../src/entrypoints/prepare-validator.ts?test=${Math.random()}` ); await expect(mod.default()).rejects.toBeTruthy(); diff --git a/test/fetch-droid-comment.test.ts b/test/fetch-droid-comment.test.ts index eeb67e5..308b527 100644 --- a/test/fetch-droid-comment.test.ts +++ b/test/fetch-droid-comment.test.ts @@ -26,8 +26,7 @@ describe("fetchDroidComment", () => { data: { id: 123456, body: "Test issue comment", - html_url: - "https://github.com/owner/repo/issues/1#issuecomment-123456", + html_url: "https://github.com/owner/repo/issues/1#issuecomment-123456", }, }; @@ -59,8 +58,7 @@ describe("fetchDroidComment", () => { data: { id: 789012, body: "Test PR review comment", - html_url: - "https://github.com/owner/repo/pull/1#discussion_r789012", + html_url: "https://github.com/owner/repo/pull/1#discussion_r789012", }, }; @@ -146,8 +144,7 @@ describe("fetchDroidComment", () => { data: { id: 456789, body: "Test issue comment fetched via fallback", - html_url: - "https://github.com/owner/repo/issues/1#issuecomment-456789", + html_url: "https://github.com/owner/repo/issues/1#issuecomment-456789", }, }; @@ -183,9 +180,7 @@ describe("fetchDroidComment", () => { comment_id: 456789, }); - expect(result.comment.body).toBe( - "Test issue comment fetched via fallback", - ); + expect(result.comment.body).toBe("Test issue comment fetched via fallback"); expect(result.isPRReviewComment).toBe(false); }); @@ -266,8 +261,7 @@ describe("fetchDroidComment", () => { data: { id: 111222, body: null, - html_url: - "https://github.com/owner/repo/issues/1#issuecomment-111222", + html_url: "https://github.com/owner/repo/issues/1#issuecomment-111222", }, }; diff --git a/test/github/data/review-artifacts.test.ts b/test/github/data/review-artifacts.test.ts new file mode 100644 index 0000000..3a3b2ae --- /dev/null +++ b/test/github/data/review-artifacts.test.ts @@ -0,0 +1,204 @@ +import { afterEach, beforeEach, describe, expect, it, spyOn } from "bun:test"; +import { + computeAndStoreDiff, + fetchAndStoreComments, + storeDescription, + computeReviewArtifacts, +} from "../../../src/github/data/review-artifacts"; +import * as childProcess from "child_process"; +import * as fsPromises from "fs/promises"; + +describe("review-artifacts", () => { + let execSyncSpy: ReturnType; + let writeFileSpy: ReturnType; + let mkdirSpy: ReturnType; + + beforeEach(() => { + writeFileSpy = spyOn(fsPromises, "writeFile").mockResolvedValue(); + mkdirSpy = spyOn(fsPromises, "mkdir").mockResolvedValue(undefined); + }); + + afterEach(() => { + execSyncSpy?.mockRestore(); + writeFileSpy.mockRestore(); + mkdirSpy.mockRestore(); + }); + + describe("computeAndStoreDiff", () => { + it("computes diff via git merge-base and writes to disk", async () => { + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + ) => { + if (cmd.includes("is-shallow-repository")) return "false\n"; + if (cmd.includes("fetch origin main")) return ""; + if (cmd.includes("merge-base")) return "abc123\n"; + if (cmd.includes("diff")) return "diff --git a/f.ts b/f.ts\n+line\n"; + return ""; + }) as typeof childProcess.execSync); + + const result = await computeAndStoreDiff("main", "/tmp/test"); + + expect(result).toBe("/tmp/test/droid-prompts/pr.diff"); + expect(mkdirSpy).toHaveBeenCalledWith("/tmp/test/droid-prompts", { + recursive: true, + }); + const writeCall = writeFileSpy.mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).includes("pr.diff"), + ); + expect(writeCall).toBeDefined(); + expect(writeCall![1]).toContain("diff --git"); + }); + + it("falls back to gh pr diff when merge-base fails", async () => { + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + opts?: any, + ) => { + if (cmd.includes("is-shallow-repository")) return "false\n"; + if (cmd.includes("merge-base")) throw new Error("no merge base"); + if (cmd.includes("gh pr diff")) { + expect(opts?.env?.GH_TOKEN).toBe("test-token"); + return "diff from gh cli\n"; + } + return ""; + }) as typeof childProcess.execSync); + + const result = await computeAndStoreDiff("main", "/tmp/test", { + githubToken: "test-token", + prNumber: 42, + }); + + expect(result).toBe("/tmp/test/droid-prompts/pr.diff"); + const writeCall = writeFileSpy.mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).includes("pr.diff"), + ); + expect(writeCall![1]).toContain("diff from gh cli"); + }); + + it("throws when merge-base fails and no fallback credentials", async () => { + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + ) => { + if (cmd.includes("is-shallow-repository")) return "false\n"; + if (cmd.includes("merge-base")) throw new Error("no merge base"); + return ""; + }) as typeof childProcess.execSync); + + await expect(computeAndStoreDiff("main", "/tmp/test")).rejects.toThrow( + "no fallback credentials", + ); + }); + }); + + describe("fetchAndStoreComments", () => { + it("fetches issue and review comments and writes JSON", async () => { + const mockOctokit = { + rest: { + issues: { + listComments: () => + Promise.resolve({ + data: [{ id: 1, body: "issue comment" }], + }), + }, + pulls: { + listReviewComments: () => + Promise.resolve({ + data: [{ id: 2, body: "review comment" }], + }), + }, + }, + } as any; + + const result = await fetchAndStoreComments( + mockOctokit, + "owner", + "repo", + 42, + "/tmp/test", + ); + + expect(result).toBe("/tmp/test/droid-prompts/existing_comments.json"); + const writeCall = writeFileSpy.mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && + (c[0] as string).includes("existing_comments.json"), + ); + expect(writeCall).toBeDefined(); + const written = JSON.parse(writeCall![1] as string); + expect(written.issueComments).toHaveLength(1); + expect(written.reviewComments).toHaveLength(1); + }); + }); + + describe("storeDescription", () => { + it("writes PR title and body as markdown", async () => { + const result = await storeDescription( + "My Feature", + "Adds cool stuff", + "/tmp/test", + ); + + expect(result).toBe("/tmp/test/droid-prompts/pr_description.txt"); + const writeCall = writeFileSpy.mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && + (c[0] as string).includes("pr_description.txt"), + ); + expect(writeCall).toBeDefined(); + expect(writeCall![1]).toBe("# My Feature\n\nAdds cool stuff"); + }); + + it("handles empty body", async () => { + await storeDescription("Title Only", "", "/tmp/test"); + + const writeCall = writeFileSpy.mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && + (c[0] as string).includes("pr_description.txt"), + ); + expect(writeCall![1]).toBe("# Title Only\n\n"); + }); + }); + + describe("computeReviewArtifacts", () => { + it("runs all three artifact computations in parallel", async () => { + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + ) => { + if (cmd.includes("is-shallow-repository")) return "false\n"; + if (cmd.includes("merge-base")) return "abc123\n"; + if (cmd.includes("diff")) return "some diff\n"; + return ""; + }) as typeof childProcess.execSync); + + const mockOctokit = { + rest: { + issues: { + listComments: () => Promise.resolve({ data: [] }), + }, + pulls: { + listReviewComments: () => Promise.resolve({ data: [] }), + }, + }, + } as any; + + const result = await computeReviewArtifacts({ + baseRef: "main", + tempDir: "/tmp/test", + octokit: mockOctokit, + owner: "owner", + repo: "repo", + prNumber: 99, + title: "Test PR", + body: "Test body", + githubToken: "token", + }); + + expect(result.diffPath).toContain("pr.diff"); + expect(result.commentsPath).toContain("existing_comments.json"); + expect(result.descriptionPath).toContain("pr_description.txt"); + }); + }); +}); diff --git a/test/integration/review-flow.test.ts b/test/integration/review-flow.test.ts index 7d8f64b..e8c51d5 100644 --- a/test/integration/review-flow.test.ts +++ b/test/integration/review-flow.test.ts @@ -40,15 +40,15 @@ describe("review command integration", () => { setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {}); exportVarSpy = spyOn(core, "exportVariable").mockImplementation(() => {}); - execSyncSpy = spyOn(childProcess, "execSync").mockImplementation( - ((cmd: string) => { - if (cmd.includes("merge-base")) return "abc123def456\n"; - if (cmd.includes("git --no-pager diff")) { - return "diff --git a/file.ts b/file.ts\n+added line\n"; - } - return ""; - }) as typeof childProcess.execSync, - ); + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + ) => { + if (cmd.includes("merge-base")) return "abc123def456\n"; + if (cmd.includes("git --no-pager diff")) { + return "diff --git a/file.ts b/file.ts\n+added line\n"; + } + return ""; + }) as typeof childProcess.execSync); }); afterEach(async () => { @@ -103,7 +103,7 @@ describe("review command integration", () => { } as any, }); - const octokit = { + const octokit = { rest: { issues: { listComments: () => Promise.resolve({ data: [] }), @@ -111,16 +111,17 @@ describe("review command integration", () => { pulls: { listReviewComments: () => Promise.resolve({ data: [] }), }, - }, - graphql: () => Promise.resolve({ - repository: { - pullRequest: { - baseRefName: "main", - headRefName: "feature/review", - headRefOid: "def456", - } - } - }) + }, + graphql: () => + Promise.resolve({ + repository: { + pullRequest: { + baseRefName: "main", + headRefName: "feature/review", + headRefOid: "def456", + }, + }, + }), } as any; graphqlSpy = spyOn(octokit, "graphql").mockResolvedValue({ diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts index 6ddedc0..5b8b8af 100644 --- a/test/modes/tag/review-command.test.ts +++ b/test/modes/tag/review-command.test.ts @@ -58,17 +58,17 @@ describe("prepareReviewMode", () => { () => {}, ); // Mock execSync for git commands - execSyncSpy = spyOn(childProcess, "execSync").mockImplementation( - ((cmd: string) => { - if (cmd.includes("merge-base")) { - return "abc123def456\n"; - } - if (cmd.includes("diff")) { - return "diff --git a/file.ts b/file.ts\n+added line\n"; - } - return ""; - }) as typeof childProcess.execSync, - ); + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation((( + cmd: string, + ) => { + if (cmd.includes("merge-base")) { + return "abc123def456\n"; + } + if (cmd.includes("diff")) { + return "diff --git a/file.ts b/file.ts\n+added line\n"; + } + return ""; + }) as typeof childProcess.execSync); // Mock file system operations writeFileSpy = spyOn(fsPromises, "writeFile").mockResolvedValue(); mkdirSpy = spyOn(fsPromises, "mkdir").mockResolvedValue(undefined); @@ -389,9 +389,10 @@ describe("prepareReviewMode", () => { const droidArgsCall = setOutputSpy.mock.calls.find( (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; - // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, we default to gpt-5.2 at high reasoning. - expect(droidArgsCall?.[1]).toContain('--model "gpt-5.2"'); - expect(droidArgsCall?.[1]).toContain('--reasoning-effort "high"'); + // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, no --model or --reasoning-effort + // flags are added. Defaults are handled by the action.yml inputs (gpt-5.2 / high). + expect(droidArgsCall?.[1]).not.toContain("--model"); + expect(droidArgsCall?.[1]).not.toContain("--reasoning-effort"); }); it("stores PR description as an artifact file", async () => { From 07a08ac9ba352c395d921fbc4c310e6d1ee850a1 Mon Sep 17 00:00:00 2001 From: Varin Nair Date: Mon, 2 Mar 2026 12:59:02 -0800 Subject: [PATCH 5/6] Release/2026-03-02 (#46) * fix: forward review_model and reasoning_effort to validator step (#42) The v3 refactor removed the hardcoded gpt-5.2/high fallback from review-validator.ts but didn't add REVIEW_MODEL and REASONING_EFFORT to the Prepare validator step's env block in action.yml. This causes the validator pass to fall back to the Droid CLI's internal default model instead of using the user's configured review_model. * fix: pin upload-artifact to v4.6.2 SHA (#43) --------- Co-authored-by: Zahid Khawaja --- action.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/action.yml b/action.yml index c716538..e419f98 100644 --- a/action.yml +++ b/action.yml @@ -346,6 +346,8 @@ runs: REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} DROID_COMMENT_ID: ${{ steps.prepare.outputs.droid_comment_id }} + REVIEW_MODEL: ${{ inputs.review_model }} + REASONING_EFFORT: ${{ inputs.reasoning_effort }} - name: Run Droid Exec (validator) id: droid_validator @@ -405,7 +407,7 @@ runs: - name: Upload debug artifacts if: always() && steps.prepare.outputs.contains_trigger == 'true' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: droid-review-debug-${{ github.run_id }} path: | From 4752b920ee34f14c11328cd0b08a804943369772 Mon Sep 17 00:00:00 2001 From: factory-nizar Date: Thu, 16 Apr 2026 11:50:38 -0700 Subject: [PATCH 6/6] Release v5 (#78) * fix: forward review_model and reasoning_effort to validator step (#42) The v3 refactor removed the hardcoded gpt-5.2/high fallback from review-validator.ts but didn't add REVIEW_MODEL and REASONING_EFFORT to the Prepare validator step's env block in action.yml. This causes the validator pass to fall back to the Droid CLI's internal default model instead of using the user's configured review_model. * fix: pin upload-artifact to v4.6.2 SHA (#43) * feat: support custom review guidelines via review-guidelines skill Read .factory/skills/review-guidelines.md from the workspace and inject its content into all review prompt templates (code review, candidates, validator, and security review). This allows repository maintainers to define repo-specific review guidelines without polluting AGENTS.md. Closes FAC-16667 Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * refactor: address review feedback - Use .factory/skills/review-guidelines/SKILL.md path (follow skill conventions) - Extract formatGuidelinesSection() helper to centralize prompt formatting - Use consistent tags across all templates Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: add size cap and truncation for review guidelines Truncate review guidelines exceeding 80k characters (matching AGENTS.md handling) and append a note directing the model to read the full file. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: match factory-mono truncation pattern more closely - Reserve space for truncation marker within the size limit (total stays <= MAX) - Add minimum meaningful space check (>200 chars) - Nudge model to use tools to read full file when truncated Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * docs: add custom review guidelines section to README Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * refactor: extract MIN_TRUNCATED_CONTENT_CHARS constant Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Consolidate review summary into tracking comment instead of separate PR review body The review previously posted two comments: the initial tracking comment (updated with completion status) and a separate PR review summary via submit_review. This consolidates them so the review summary is written to the tracking comment using update_droid_comment, and submit_review is called without a body parameter. Applies to both single-pass review and two-pass validator flows. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Remove extra formatting instructions from review summary prompts Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Remove single-pass review flow, always use two-pass validator The single-pass review flow was initially created because we were unsure about the performance of the two-pass validator flow. The two-pass flow has since become the default and proven itself, and the single-pass flow is no longer used. This removes the review_use_validator toggle and all single-pass review code. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Batch all review comments into a single submit_review call Instead of posting inline comments individually via create_inline_comment (which creates separate review events for each), batch all comments into the submit_review comments array. This produces a single cohesive review. - Updated submit_review tool schema to support line/side/start_line format - Updated review and validator prompts to batch comments - Removed create_inline_comment from review allowed tools Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Update github-pr-server.ts * refactor: move review guidelines from prompt injection to skill invocation Remove direct injection of review-guidelines file content into orchestrator prompts. Instead, the file-group-reviewer subagent now invokes the review-guidelines skill via the Skill tool, ensuring the guidelines are consumed where they're actually needed (by the reviewer, not the orchestrator). Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: add Skill to enabled tools so subagents can invoke review-guidelines The --enabled-tools allowlist passed to droid exec did not include Skill, which meant file-group-reviewer subagents could not invoke the review-guidelines skill even though their droid config listed it. Add Skill to the candidateGenerationTools array in both generate-review-prompt and review command. Also strengthen file-group-reviewer instructions to treat custom guideline violations as mandatory (not stylistic) and make skill invocation the required first step. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat(review): add suggestion block guidance Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> fix(review): escape suggestion blocks in prompt Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> fix(review): enforce suggestion-only replacements Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> update * fix(review): allow insert-only suggestions Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix(review): clarify insert-only suggestions Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix(review): keep anchors consistent across phases Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * refactor(review): slim suggestion rules Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * update 250 to 100 * Add validator_model input to allow separate model for validator pass Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Add strict deduplication rules to validator prompt Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat(review): make suggestion blocks configurable via include_suggestions parameter Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix(review): remove redundant anchor rejection rule from validator Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix(review): remove validator_model override Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: load review methodology from factory-mono builtin skill Load the review skill's shared methodology from factory-mono's builtin-skills/review/SKILL.md instead of keeping it inline in the CI prompt templates. The skill is loaded at runtime via local plugin cache or GitHub fallback. The shared methodology (bug patterns, reporting gate, confidence calibration, deduplication, analysis discipline) is extracted via BEGIN_SHARED_METHODOLOGY / END_SHARED_METHODOLOGY markers and injected into both candidate and validator prompts. Suggestion block rules remain controlled by the include_suggestions toggle at the CI template level. Depends on: https://github.com/Factory-AI/factory-mono/pull/11498 Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: use CLI builtin review skill instead of fetching from GitHub The Droid CLI bundles the review skill as a builtin, so the agent can invoke it via the Skill tool at runtime. Remove the load-skill.ts GitHub fetch and instead instruct the agent to invoke the 'review' skill directly. This eliminates the network dependency during the prepare step and uses the CLI as the single source of truth for the review methodology. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * refactor: make templates thin wrappers, delegate structure to review skill Both candidate and validator prompts now only provide: - Session framing (senior engineer / validating candidates) - PR context and file paths - Skill invocation instruction (Pass 1 or Pass 2) - CI-specific output schema and posting constraints All review methodology, triage, parallel review, validation rules, deduplication, and confidence filtering come from the skill. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: add path_to_droid_executable input to review action Allows using a custom CLI build for testing builtin skills from unreleased branches. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * revert: remove path_to_droid_executable from review action Not needed for this PR. Can be added separately if custom CLI testing is needed in the future. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * Merge pull request #66 from Factory-AI/feat/review-depth-shallow-deep feat: add shallow/deep review depth presets (default: shallow with kimi-k2-0711) * security review plugins * security review plugins * STRIDE enforcement * fix: create prompt file when both automatic review flags are set When both automatic_review and automatic_security_review were true, prepareTagExecution returned early without calling prepareReviewMode, so no prompt file was written. The Droid Exec step then failed with 'Prompt file does not exist'. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: move security review to skill + run concurrently with code review (#71) * feat: move security review to skill + run concurrently with code review - Create security-reviewer custom droid that invokes the security-review skill and returns findings as a JSON array (same format as file-group-reviewer) - Simplify security-review-prompt.ts to a candidates prompt that references the security-review skill instead of inlining all STRIDE methodology - When both automatic_review and automatic_security_review are enabled, spawn security-reviewer as a Task subagent alongside file-group-reviewers during pass 1, merging all candidates into one JSON for the validator - Standalone @droid security now uses the two-pass pipeline (candidates + validator) instead of posting inline comments directly - Update security/action.yml with validator steps - Enable Task/Skill/FetchUrl tools for security review candidate generation Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * test: point droid-review workflow at branch for CI testing Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * feat: use [security] tag instead of STRIDE letters, add security badge to tracking comment Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: show correct tracking comment when running both code review and security review Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --------- Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * fix: bump internal CI workflow from @v3 to @dev (#74) The internal droid-review.yml workflow passes automatic_security_review: true, but @v3 does not support that input. The @v3 prepare step enters the combined review code path which returns early without generating a prompt file, causing the base-action to fail with 'Prompt file does not exist'. Pointing at @dev picks up the concurrent security review support. This only affects CI for this repo -- customers continue using @v3 in their own workflows. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> * chore: remove file-group-reviewer droid, update version refs to v5 (#76) * eval: remove file-group-reviewer droid in favor of SKILL.md inlined methodology The review skill now uses worker subagents with the full shared methodology inlined into each prompt. The file-group-reviewer.md droid and its tests are no longer needed. * chore: update version refs to v5, clean up file-group-reviewer references - Update all @v3 references to @v5 in README and droid.yml workflow - Remove file-group-reviewer mentions from prompts, comments, and droid descriptions Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --------- Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --------- Co-authored-by: Zahid Khawaja Co-authored-by: Varin Nair Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> Co-authored-by: Shashank Sharma --- .factory/droids/file-group-reviewer.md | 76 ---- .factory/droids/security-reviewer.md | 65 +++ .github/workflows/droid-review.yml | 3 +- .github/workflows/droid.yml | 2 +- README.md | 84 +++- action.yml | 94 ++--- review/action.yml | 30 +- security/action.yml | 112 +++-- src/create-prompt/index.ts | 6 + .../templates/review-candidates-prompt.ts | 152 ++----- src/create-prompt/templates/review-prompt.ts | 389 ------------------ .../templates/review-validator-prompt.ts | 104 ++--- .../templates/security-report-prompt.ts | 2 +- .../templates/security-review-prompt.ts | 236 ++++------- src/create-prompt/types.ts | 1 + src/entrypoints/generate-review-prompt.ts | 65 ++- src/entrypoints/prepare-validator.ts | 7 - src/github/operations/comments/common.ts | 14 +- src/mcp/github-pr-server.ts | 52 ++- src/tag/commands/review-validator.ts | 16 +- src/tag/commands/review.ts | 55 +-- src/tag/commands/security-review.ts | 73 +++- src/tag/index.ts | 56 ++- src/utils/review-depth.ts | 41 ++ .../review-candidates-prompt.test.ts | 63 ++- .../templates/review-prompt.test.ts | 179 -------- .../templates/review-validator-prompt.test.ts | 69 +++- .../templates/security-review-prompt.test.ts | 96 ++--- test/entrypoints/prepare-validator.test.ts | 7 +- test/integration/review-flow.test.ts | 14 +- test/modes/tag/review-command.test.ts | 92 +---- .../modes/tag/security-review-command.test.ts | 98 +++-- 32 files changed, 900 insertions(+), 1453 deletions(-) delete mode 100644 .factory/droids/file-group-reviewer.md create mode 100644 .factory/droids/security-reviewer.md delete mode 100644 src/create-prompt/templates/review-prompt.ts create mode 100644 src/utils/review-depth.ts delete mode 100644 test/create-prompt/templates/review-prompt.test.ts diff --git a/.factory/droids/file-group-reviewer.md b/.factory/droids/file-group-reviewer.md deleted file mode 100644 index 7da7902..0000000 --- a/.factory/droids/file-group-reviewer.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -name: file-group-reviewer -description: Reviews an assigned subset of PR files for bugs, security issues, and correctness problems. Spawned in parallel by the main review agent to ensure thorough coverage. -model: inherit -tools: ["Read", "Grep", "Glob", "LS"] ---- - -You are a senior staff software engineer and expert code reviewer. - -Your task: Review the assigned files from the PR and generate a JSON array of **high-confidence, actionable** review comments that pinpoint genuine issues. - - - -- You are currently checked out to the PR branch. -- Review ALL files assigned to you thoroughly. -- Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems. -- High-signal bug patterns to actively check for (only comment when evidenced in the diff): - - Null/undefined/Optional dereferences; missing-key errors on untrusted/external dict/JSON payloads - - Resource leaks (unclosed files/streams/connections; missing cleanup on error paths) - - Injection vulnerabilities (SQL injection, XSS, command/template injection) and auth/security invariant violations - - OAuth/CSRF invariants: state must be per-flow unpredictable and validated; avoid deterministic/predictable state or missing state checks - - Concurrency/race/atomicity hazards (TOCTOU, lost updates, unsafe shared state, process/thread lifecycle bugs) - - Missing error handling for critical operations (network, persistence, auth, migrations, external APIs) - - Wrong-variable/shadowing mistakes; contract mismatches (serializer/validated_data, interfaces/abstract methods) - - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches) - - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics) -- Only flag issues you are confident about—avoid speculative or stylistic nitpicks. - - - -1. Read each assigned file in full to understand the context -2. Read the relevant diff sections provided in the prompt -3. Read related files as needed to fully understand the changes: - - Imported modules and dependencies - - Interfaces, base classes, and type definitions - - Related tests to understand expected behavior - - Callers/callees of modified functions - - Configuration files if behavior depends on them -4. Analyze the changes for issues matching the bug patterns above -5. For each issue found, verify it against the actual code and related context before including it - - - -Return your findings as a JSON array (no wrapper object, just the array): - -```json -[ - { - "path": "src/index.ts", - "body": "[P1] Title\n\n1 paragraph explanation.", - "line": 42, - "startLine": null, - "side": "RIGHT" - } -] -``` - -If no issues found, return an empty array: `[]` - -Field definitions: - -- `path`: Relative file path (must match exactly as provided in your assignment) -- `body`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation - - P0: Critical bugs (crashes, security vulnerabilities, data loss) - - P1: Important bugs (incorrect behavior, logic errors) - - P2: Minor bugs (edge cases, non-critical issues) -- `line`: Target line number (single-line) or end line number (multi-line). Must be ≥ 0. -- `startLine`: `null` for single-line comments, or start line number for multi-line comments -- `side`: "RIGHT" for new/modified code (default), "LEFT" only for commenting on removed code - - - -- Output ONLY the JSON array—no additional commentary or markdown formatting around it. -- Do not include `commit_id` in your output—the parent agent will add this. -- Do not attempt to post comments to GitHub—just return the JSON array. - diff --git a/.factory/droids/security-reviewer.md b/.factory/droids/security-reviewer.md new file mode 100644 index 0000000..beacafa --- /dev/null +++ b/.factory/droids/security-reviewer.md @@ -0,0 +1,65 @@ +--- +name: security-reviewer +description: Performs security-focused review of PR changes. Spawned as a parallel subagent alongside code review to identify security vulnerabilities. +model: inherit +tools: ["Read", "Grep", "Glob", "LS", "Skill"] +--- + +You are a senior security engineer performing a security-focused code review. + +Your task: Review the PR diff provided below and identify **high-confidence security vulnerabilities**. + + +1. **Load security review methodology (REQUIRED)**: Before starting, invoke the `security-review` skill using the Skill tool. This is your FIRST action — do not read any files before doing this. The skill provides the STRIDE threat model categories, severity definitions, and analysis methodology you must follow. +2. Read each changed file in full to understand the security context +3. Read the relevant diff sections provided in the prompt +4. Trace data flows across file boundaries, especially where user input is involved: + - Follow input from request handlers through processing to database/output + - Check authentication and authorization at each trust boundary + - Identify injection points where untrusted data enters trusted contexts +5. Read related files as needed: + - Authentication/authorization middleware and decorators + - Input validation schemas and sanitization utilities + - Database query builders and ORM configurations + - Security configuration files +6. Analyze the changes for security vulnerabilities using the STRIDE methodology from the skill +7. For each vulnerability found, verify the data flow and confirm exploitability before including it + + + +Return your findings as a JSON array (no wrapper object, just the array): + +```json +[ + { + "path": "src/api/users.ts", + "body": "[P1] [security] SQL injection via unsanitized user input\n\nThe search parameter is concatenated directly into the SQL query without parameterization, allowing an attacker to inject arbitrary SQL.", + "line": 42, + "startLine": null, + "side": "RIGHT" + } +] +``` + +If no security issues found, return an empty array: `[]` + +Field definitions: + +- `path`: Relative file path (must match exactly as provided in your assignment) +- `body`: Comment text starting with priority tag [P0|P1|P2|P3], then `[security]` tag, then title, then 1 paragraph explanation + - P0: Critical — immediately exploitable (RCE, auth bypass, hardcoded secrets) + - P1: High — exploitable with conditions (SQL injection behind auth, stored XSS) + - P2: Medium — requires specific conditions (CSRF, info disclosure) + - P3: Low — minor security concern +- `line`: Target line number (single-line) or end line number (multi-line). Must be >= 0. +- `startLine`: `null` for single-line comments, or start line number for multi-line comments +- `side`: "RIGHT" for new/modified code (default), "LEFT" only for commenting on removed code + + + +- Output ONLY the JSON array — no additional commentary or markdown formatting around it. +- Do not include `commit_id` in your output — the parent agent will add this. +- Do not attempt to post comments to GitHub — just return the JSON array. +- Focus exclusively on security vulnerabilities, not code quality or style. +- Only report findings with high confidence and a realistic exploit path. + diff --git a/.github/workflows/droid-review.yml b/.github/workflows/droid-review.yml index 31a1f6d..b9a7bfd 100644 --- a/.github/workflows/droid-review.yml +++ b/.github/workflows/droid-review.yml @@ -25,7 +25,8 @@ jobs: fetch-depth: 1 - name: Run Droid Auto Review - uses: Factory-AI/droid-action@v3 + uses: Factory-AI/droid-action@dev with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} automatic_review: true + automatic_security_review: true diff --git a/.github/workflows/droid.yml b/.github/workflows/droid.yml index c034f4c..e011d89 100644 --- a/.github/workflows/droid.yml +++ b/.github/workflows/droid.yml @@ -34,6 +34,6 @@ jobs: fetch-depth: 1 - name: Run Droid Exec - uses: Factory-AI/droid-action@v3 + uses: Factory-AI/droid-action@v5 with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} diff --git a/README.md b/README.md index f0ec5f9..dd6141d 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ jobs: fetch-depth: 1 - name: Run Droid Exec - uses: Factory-AI/droid-action@v3 + uses: Factory-AI/droid-action@v5 with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} ``` @@ -104,7 +104,7 @@ jobs: fetch-depth: 1 - name: Run Droid Auto Review - uses: Factory-AI/droid-action@v3 + uses: Factory-AI/droid-action@v5 with: factory_api_key: ${{ secrets.FACTORY_API_KEY }} automatic_review: true @@ -150,11 +150,67 @@ Set `automatic_review: true` to run code reviews automatically on non-draft PRs. ### Review Configuration -| Input | Default | Purpose | -| ------------------ | --------- | ----------------------------------------------------------------------- | -| `automatic_review` | `false` | Automatically run code review on PRs without requiring `@droid review`. | -| `review_model` | `gpt-5.2` | Override the model used for code review. | -| `fill_model` | `""` | Override the model used for PR description fill. | +| Input | Default | Purpose | +| ------------------ | ------- | ---------------------------------------------------------------------------------------------------- | +| `automatic_review` | `false` | Automatically run code review on PRs without requiring `@droid review`. | +| `review_depth` | `deep` | Review depth preset: `shallow` (fast) or `deep` (thorough). See [Review Depth](#review-depth) below. | +| `review_model` | `""` | Override the model for code review. When empty, determined by `review_depth`. | +| `reasoning_effort` | `""` | Override reasoning effort for review. When empty, determined by `review_depth`. | +| `fill_model` | `""` | Override the model used for PR description fill. | + +### Review Depth + +The `review_depth` input controls which model and reasoning effort are used for code reviews. Two presets are available: + +| Depth | Model | Reasoning Effort | Best For | +| ----------- | -------------- | ---------------- | ------------------------------------------------------- | +| **deep** | `gpt-5.2` | `high` | Thorough reviews catching subtle bugs and design issues | +| **shallow** | `kimi-k2-0711` | default | Fast, cost-effective reviews for straightforward PRs | + +**Examples:** + +```yaml +# Deep review (default - no extra config needed) +- uses: Factory-AI/droid-action@v5 + with: + factory_api_key: ${{ secrets.FACTORY_API_KEY }} + automatic_review: true + +# Shallow review for faster feedback +- uses: Factory-AI/droid-action@v5 + with: + factory_api_key: ${{ secrets.FACTORY_API_KEY }} + automatic_review: true + review_depth: shallow + +# Fully custom model (overrides depth preset entirely) +- uses: Factory-AI/droid-action@v5 + with: + factory_api_key: ${{ secrets.FACTORY_API_KEY }} + automatic_review: true + review_model: claude-sonnet-4-5-20250929 + reasoning_effort: high +``` + +> **Tip:** Setting `review_model` or `reasoning_effort` explicitly always takes priority over the depth preset. You can mix and match -- for example, use `review_depth: shallow` but override just `reasoning_effort: high` to get the shallow model with higher reasoning. + +### Updating Review Models + +The depth presets are defined in [`src/utils/review-depth.ts`](src/utils/review-depth.ts). To change which models are used for shallow or deep reviews, edit the `REVIEW_DEPTH_PRESETS` object: + +```typescript +const SHALLOW_DEFAULTS = { + model: "kimi-k2-0711", // Change to any supported model + reasoningEffort: undefined, // undefined = use model default +}; + +const DEEP_DEFAULTS = { + model: "gpt-5.2", // Change to any supported model + reasoningEffort: "high", // "high" | "medium" | "low" | undefined +}; +``` + +Individual users can also override these defaults per-workflow without modifying the source by setting `review_model` and/or `reasoning_effort` inputs directly in their workflow YAML. ### Security Configuration @@ -169,6 +225,20 @@ Set `automatic_review: true` to run code reviews automatically on non-draft PRs. | `security_scan_schedule` | `false` | Configuration for scheduled security scans (when invoked from scheduled workflows). | | `security_scan_days` | `7` | Number of days of commits to scan for scheduled security scans. | +## Custom Review Guidelines + +You can add repository-specific review guidelines by creating a `.factory/skills/review-guidelines/SKILL.md` file: + +```markdown +Additional checks for this codebase: + +- React hooks rules violations +- Missing TypeScript types on public APIs +- Prisma query performance issues +``` + +These guidelines are automatically loaded and injected into all review prompts (code review, security review, and validation passes). No workflow changes needed. + ## Security Skills The security review uses specialized Factory skills installed from the public `Factory-AI/skills` repository: diff --git a/action.yml b/action.yml index e419f98..7918846 100644 --- a/action.yml +++ b/action.yml @@ -91,24 +91,28 @@ inputs: description: "Number of days of commits to scan for scheduled security scans. Only applies when security_scan_schedule is enabled." required: false default: "7" + review_depth: + description: "Review depth preset: 'shallow' (fast, uses kimi-k2-0711) or 'deep' (thorough, uses gpt-5.2 with high reasoning). Defaults to deep. Setting review_model or reasoning_effort explicitly overrides the preset values." + required: false + default: "deep" review_model: - description: "Override the model used for code review (e.g., 'claude-sonnet-4-5-20250929', 'gpt-5.1-codex'). Only applies to review flows." + description: "Override the model used for code review (e.g., 'claude-sonnet-4-5-20250929', 'gpt-5.1-codex'). If empty, the model is determined by review_depth." required: false - default: "gpt-5.2" + default: "" reasoning_effort: - description: "Override reasoning effort for review flows (passed to Droid Exec as --reasoning-effort). If empty and review_model is also empty, the action defaults internally to gpt-5.2 at high reasoning." + description: "Override reasoning effort for review flows (passed to Droid Exec as --reasoning-effort). If empty, determined by review_depth (shallow=default, deep=high)." required: false - default: "high" - review_use_validator: - description: "Enable two-pass review: generate candidate comments to JSON, then validate and post only approved ones." + default: "" + include_suggestions: + description: "Include code suggestion blocks in review comments when the fix is high-confidence. Set to 'false' to disable suggestions." required: false default: "true" review_candidates_path: - description: "Path to write review candidates JSON (run #1 when review_use_validator=true)." + description: "Path to write review candidates JSON (pass 1 of the two-pass review)." required: false default: "${{ runner.temp }}/droid-prompts/review_candidates.json" review_validated_path: - description: "Path to write review validated JSON (run #2 when review_use_validator=true)." + description: "Path to write review validated JSON (pass 2 of the two-pass review)." required: false default: "${{ runner.temp }}/droid-prompts/review_validated.json" fill_model: @@ -187,14 +191,15 @@ runs: SECURITY_NOTIFY_TEAM: ${{ inputs.security_notify_team }} SECURITY_SCAN_SCHEDULE: ${{ inputs.security_scan_schedule }} SECURITY_SCAN_DAYS: ${{ inputs.security_scan_days }} + REVIEW_DEPTH: ${{ inputs.review_depth }} REVIEW_MODEL: ${{ inputs.review_model }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} FILL_MODEL: ${{ inputs.fill_model }} ADDITIONAL_PERMISSIONS: ${{ inputs.additional_permissions }} DROID_ARGS: ${{ inputs.droid_args }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} ALL_INPUTS: ${{ toJson(inputs) }} - name: Install Base Action Dependencies @@ -247,66 +252,16 @@ runs: env: EXPERIMENTAL_ALLOWED_DOMAINS: ${{ inputs.experimental_allowed_domains }} - - name: Install Security Skills + - name: Install Security Plugin if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.install_security_skills == 'true' shell: bash run: | - echo "Installing security skills from Factory-AI/skills..." - SKILLS_DIR="$HOME/.factory/skills" - mkdir -p "$SKILLS_DIR" - - # Clone public skills repo (sparse checkout for efficiency) - TEMP_DIR=$(mktemp -d) - git clone --filter=blob:none --sparse \ - "https://github.com/Factory-AI/skills.git" \ - "$TEMP_DIR" 2>/dev/null || { - echo "Warning: Could not clone skills repo. Security skills will not be available." - exit 0 + echo "Installing security-engineer plugin from factory-plugins marketplace..." + droid plugin marketplace add https://github.com/Factory-AI/factory-plugins 2>/dev/null || true + droid plugin install security-engineer@factory-plugins --scope user 2>/dev/null || { + echo "Warning: Could not install security-engineer plugin. Security review may have limited functionality." } - - cd "$TEMP_DIR" - git sparse-checkout set \ - skills/threat-model-generation \ - skills/commit-security-scan \ - skills/vulnerability-validation \ - skills/security-review 2>/dev/null || true - - # Copy skills to ~/.factory/skills/ and track installed count - INSTALLED_COUNT=0 - for skill in threat-model-generation commit-security-scan vulnerability-validation security-review; do - if [ -d "skills/$skill" ]; then - cp -r "skills/$skill" "$SKILLS_DIR/" - echo " Installed skill: $skill" - INSTALLED_COUNT=$((INSTALLED_COUNT + 1)) - else - echo " Warning: Skill not found in repo: $skill" - fi - done - - # Cleanup - rm -rf "$TEMP_DIR" - - # Verify at least one skill was installed - if [ "$INSTALLED_COUNT" -eq 0 ]; then - echo "Warning: No security skills were installed. The skills may not exist in the Factory-AI/skills repository." - echo "Security review will proceed but may have limited functionality." - else - echo "Security skills installation complete ($INSTALLED_COUNT skills installed)" - fi - - # Verify skills exist in the target directory - echo "Verifying installed skills in $SKILLS_DIR..." - VERIFIED_COUNT=0 - for skill in threat-model-generation commit-security-scan vulnerability-validation security-review; do - if [ -d "$SKILLS_DIR/$skill" ]; then - echo " Verified: $skill" - VERIFIED_COUNT=$((VERIFIED_COUNT + 1)) - fi - done - - if [ "$VERIFIED_COUNT" -ne "$INSTALLED_COUNT" ]; then - echo "Warning: Skill verification mismatch. Expected $INSTALLED_COUNT, found $VERIFIED_COUNT in $SKILLS_DIR" - fi + echo "Security plugin installation complete" - name: Run Droid Exec id: droid @@ -336,22 +291,23 @@ runs: - name: Prepare validator id: prepare_validator - if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' + if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.run_code_review == 'true' shell: bash run: | bun run ${GITHUB_ACTION_PATH}/src/entrypoints/prepare-validator.ts env: GITHUB_TOKEN: ${{ steps.prepare.outputs.github_token }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} DROID_COMMENT_ID: ${{ steps.prepare.outputs.droid_comment_id }} + REVIEW_DEPTH: ${{ inputs.review_depth }} REVIEW_MODEL: ${{ inputs.review_model }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} - name: Run Droid Exec (validator) id: droid_validator - if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' + if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.run_code_review == 'true' shell: bash run: | @@ -389,7 +345,7 @@ runs: GITHUB_EVENT_NAME: ${{ github.event_name }} TRIGGER_COMMENT_ID: ${{ github.event.comment.id }} IS_PR: ${{ github.event.issue.pull_request != null || github.event_name == 'pull_request_target' || github.event_name == 'pull_request_review_comment' }} - DROID_SUCCESS: ${{ (inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' && steps.droid_validator.outputs.conclusion == 'success') || (!(inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true') && steps.droid.outputs.conclusion == 'success') }} + DROID_SUCCESS: ${{ (steps.prepare.outputs.run_code_review == 'true' && steps.droid_validator.outputs.conclusion == 'success') || (steps.prepare.outputs.run_code_review != 'true' && steps.droid.outputs.conclusion == 'success') }} TRIGGER_USERNAME: ${{ github.event.comment.user.login || github.event.issue.user.login || github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} PREPARE_SUCCESS: ${{ steps.prepare.outcome == 'success' }} PREPARE_ERROR: ${{ steps.prepare.outputs.prepare_error || '' }} diff --git a/review/action.yml b/review/action.yml index 6d8e620..89dd30a 100644 --- a/review/action.yml +++ b/review/action.yml @@ -12,22 +12,22 @@ inputs: tracking_comment_id: description: "ID of the tracking comment to update" required: true + review_depth: + description: "Review depth preset: 'shallow' (fast, uses kimi-k2-0711) or 'deep' (thorough, uses gpt-5.2 with high reasoning). Defaults to deep. Setting review_model or reasoning_effort explicitly overrides the preset values." + required: false + default: "deep" review_model: - description: "Model to use for review" + description: "Override the model used for review. If empty, determined by review_depth." required: false - default: "gpt-5.2" + default: "" reasoning_effort: - description: "Reasoning effort for review (passed to Droid Exec as --reasoning-effort)" + description: "Override reasoning effort for review. If empty, determined by review_depth (shallow=default, deep=high)." required: false - default: "high" + default: "" output_file: description: "Path to write review results JSON" required: false default: "" - review_use_validator: - description: "Enable two-pass review: generate candidate comments, then validate and post only approved ones." - required: false - default: "true" review_candidates_path: description: "Path to write review candidates JSON (pass 1)." required: false @@ -36,6 +36,10 @@ inputs: description: "Path to write review validated JSON (pass 2)." required: false default: "${{ runner.temp }}/droid-prompts/review_validated.json" + include_suggestions: + description: "Include code suggestion blocks in review comments when the fix is high-confidence. Set to 'false' to disable suggestions." + required: false + default: "true" outputs: conclusion: @@ -82,11 +86,12 @@ runs: env: GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} + REVIEW_DEPTH: ${{ inputs.review_depth }} REVIEW_MODEL: ${{ inputs.review_model }} REVIEW_TYPE: "code" - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} DROID_OUTPUT_FILE: ${{ inputs.output_file }} - name: Setup Custom Droids @@ -113,22 +118,21 @@ runs: - name: Prepare Validator id: prepare_validator - if: steps.prompt.outputs.review_use_validator == 'true' shell: bash run: | bun run ${{ github.action_path }}/../src/entrypoints/prepare-validator.ts env: GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} + REVIEW_DEPTH: ${{ inputs.review_depth }} REVIEW_MODEL: ${{ inputs.review_model }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} - name: Run Validator id: validator - if: steps.prompt.outputs.review_use_validator == 'true' shell: bash run: | bun run ${{ github.action_path }}/../base-action/src/index.ts @@ -153,4 +157,4 @@ runs: OUTPUT_FILE: ${{ inputs.output_file }} TRIGGER_USERNAME: ${{ github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} PREPARE_SUCCESS: "true" - DROID_SUCCESS: ${{ (steps.prompt.outputs.review_use_validator == 'true' && steps.validator.outcome == 'success') || (steps.prompt.outputs.review_use_validator != 'true' && steps.review.outcome == 'success') }} + DROID_SUCCESS: ${{ steps.validator.outcome == 'success' }} diff --git a/security/action.yml b/security/action.yml index 45e1c4e..dfcde4f 100644 --- a/security/action.yml +++ b/security/action.yml @@ -32,11 +32,35 @@ inputs: description: "Path to write security results JSON" required: false default: "" + review_candidates_path: + description: "Path to write review candidates JSON (pass 1)." + required: false + default: "${{ runner.temp }}/droid-prompts/review_candidates.json" + review_validated_path: + description: "Path to write review validated JSON (pass 2)." + required: false + default: "${{ runner.temp }}/droid-prompts/review_validated.json" + review_depth: + description: "Review depth preset for the validator." + required: false + default: "deep" + review_model: + description: "Override the model used for the validator." + required: false + default: "" + reasoning_effort: + description: "Override reasoning effort for the validator." + required: false + default: "" + include_suggestions: + description: "Include code suggestion blocks in review comments." + required: false + default: "false" outputs: conclusion: description: "Review conclusion (success/failure)" - value: ${{ steps.review.outputs.conclusion }} + value: ${{ steps.validator.outputs.conclusion }} runs: using: "composite" @@ -70,37 +94,24 @@ runs: FACTORY_API_KEY: ${{ inputs.factory_api_key }} OVERRIDE_GITHUB_TOKEN: ${{ inputs.github_token }} - - name: Install Security Skills + - name: Install Security Plugin shell: bash run: | - echo "Installing security skills from Factory-AI/skills..." - SKILLS_DIR="$HOME/.factory/skills" - mkdir -p "$SKILLS_DIR" - - TEMP_DIR=$(mktemp -d) - git clone --filter=blob:none --sparse \ - "https://github.com/Factory-AI/skills.git" \ - "$TEMP_DIR" 2>/dev/null || { - echo "Warning: Could not clone skills repo." - exit 0 + echo "Installing security-engineer plugin from factory-plugins marketplace..." + droid plugin marketplace add https://github.com/Factory-AI/factory-plugins 2>/dev/null || true + droid plugin install security-engineer@factory-plugins --scope user 2>/dev/null || { + echo "Warning: Could not install security-engineer plugin. Security review may have limited functionality." } + echo "Security plugin installation complete" - cd "$TEMP_DIR" - git sparse-checkout set \ - skills/threat-model-generation \ - skills/commit-security-scan \ - skills/vulnerability-validation \ - skills/security-review 2>/dev/null || true - - for skill in threat-model-generation commit-security-scan vulnerability-validation security-review; do - if [ -d "skills/$skill" ]; then - cp -r "skills/$skill" "$SKILLS_DIR/" - echo " Installed skill: $skill" - fi - done - - rm -rf "$TEMP_DIR" - echo "Security skills installation complete" + - name: Setup Custom Droids + shell: bash + run: | + mkdir -p ~/.factory/droids + if [ -d "${{ github.action_path }}/../.factory/droids" ]; then + cp -r ${{ github.action_path }}/../.factory/droids/* ~/.factory/droids/ + echo "Copied custom droids to ~/.factory/droids/" + fi - name: Generate Security Prompt id: prompt @@ -115,6 +126,8 @@ runs: SECURITY_BLOCK_ON_CRITICAL: ${{ inputs.security_block_on_critical }} SECURITY_BLOCK_ON_HIGH: ${{ inputs.security_block_on_high }} REVIEW_TYPE: "security" + REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} DROID_OUTPUT_FILE: ${{ inputs.output_file }} - name: Run Security Review @@ -129,3 +142,46 @@ runs: FACTORY_API_KEY: ${{ inputs.factory_api_key }} GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} DROID_OUTPUT_FILE: ${{ inputs.output_file }} + + - name: Prepare Validator + id: prepare_validator + shell: bash + run: | + bun run ${{ github.action_path }}/../src/entrypoints/prepare-validator.ts + env: + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} + REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} + REVIEW_DEPTH: ${{ inputs.review_depth }} + REVIEW_MODEL: ${{ inputs.review_model }} + REASONING_EFFORT: ${{ inputs.reasoning_effort }} + INCLUDE_SUGGESTIONS: ${{ inputs.include_suggestions }} + DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} + + - name: Run Validator + id: validator + shell: bash + run: | + bun run ${{ github.action_path }}/../base-action/src/index.ts + env: + INPUT_PROMPT_FILE: ${{ runner.temp }}/droid-prompts/droid-prompt.txt + INPUT_DROID_ARGS: ${{ steps.prepare_validator.outputs.droid_args }} + INPUT_MCP_TOOLS: ${{ steps.prepare_validator.outputs.mcp_tools }} + FACTORY_API_KEY: ${{ inputs.factory_api_key }} + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + + - name: Update tracking comment + if: always() && inputs.tracking_comment_id + continue-on-error: true + shell: bash + run: | + bun run ${{ github.action_path }}/../src/entrypoints/update-comment-link.ts + env: + DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} + GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + OUTPUT_FILE: ${{ inputs.output_file }} + TRIGGER_USERNAME: ${{ github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} + PREPARE_SUCCESS: "true" + DROID_SUCCESS: ${{ steps.validator.outcome == 'success' }} diff --git a/src/create-prompt/index.ts b/src/create-prompt/index.ts index 415013f..ce966b4 100644 --- a/src/create-prompt/index.ts +++ b/src/create-prompt/index.ts @@ -304,6 +304,7 @@ export type PromptCreationOptions = { includeActionsTools?: boolean; reviewArtifacts?: ReviewArtifacts; outputFilePath?: string; + includeSuggestions?: boolean; }; export async function createPrompt({ @@ -318,6 +319,7 @@ export async function createPrompt({ includeActionsTools = false, reviewArtifacts, outputFilePath, + includeSuggestions, }: PromptCreationOptions) { try { const droidCommentId = commentId?.toString(); @@ -334,6 +336,10 @@ export async function createPrompt({ preparedContext.outputFilePath = outputFilePath; } + if (includeSuggestions !== undefined) { + preparedContext.includeSuggestions = includeSuggestions; + } + await mkdir(`${process.env.RUNNER_TEMP || "/tmp"}/droid-prompts`, { recursive: true, }); diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts index 23d27d2..a596041 100644 --- a/src/create-prompt/templates/review-candidates-prompt.ts +++ b/src/create-prompt/templates/review-candidates-prompt.ts @@ -27,10 +27,49 @@ export function generateReviewCandidatesPrompt( process.env.REVIEW_CANDIDATES_PATH ?? "$RUNNER_TEMP/droid-prompts/review_candidates.json"; + const includeSuggestions = context.includeSuggestions !== false; + + const bodyFieldDescription = includeSuggestions + ? " - `body`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation.\n" + + " Follow the suggestion block rules from the review skill when including suggestions." + : " - `body`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation"; + + const sideFieldDescription = includeSuggestions + ? ' - `side`: "RIGHT" for new/modified code (default). Use "LEFT" only for removed code **without** suggestions.\n' + + " If you include a suggestion block, choose a RIGHT-side anchor and keep it unchanged so the validator can reuse it." + : ' - `side`: "RIGHT" for new/modified code (default), "LEFT" only for removed code'; + + const skillInstruction = includeSuggestions + ? "Invoke the 'review' skill to load the review methodology, then execute its **Pass 1: Candidate Generation** procedure — including suggestion block rules." + : "Invoke the 'review' skill to load the review methodology, then execute its **Pass 1: Candidate Generation** procedure. Do NOT include code suggestion blocks."; + + const securityReviewEnabled = process.env.SECURITY_REVIEW_ENABLED === "true"; + + const securitySubagentInstruction = securityReviewEnabled + ? ` + +## Security Review (run concurrently) + +In addition to the code review, you MUST also spawn a \`security-reviewer\` subagent via the Task tool. +This subagent runs **concurrently** with the code review subagents during Step 2. + +Spawn it with: +- \`subagent_type\`: "security-reviewer" +- \`description\`: "Security review" +- \`prompt\`: Include the full PR context (repo, PR number, head SHA, base ref) and the paths to precomputed data files (diff, description, existing comments). The security-reviewer will invoke the security-review skill and return a JSON array of security findings. + +**IMPORTANT**: Spawn the security-reviewer in the SAME response as the code review subagents so they all run in parallel. + +After all subagents complete (both code review and security-reviewer), merge the security findings into the \`comments\` array alongside code review findings. Security findings use the same schema but are prefixed with \`[security]\` in their body (e.g., \`[P1] [security] Title\`). +` + : ""; + return `You are a senior staff software engineer and expert code reviewer. Your task: Review PR #${prNumber} in ${repoFullName} and generate a JSON file with **high-confidence, actionable** review comments that pinpoint genuine issues. +${skillInstruction}${securitySubagentInstruction} + Repo: ${repoFullName} PR Number: ${prNumber} @@ -44,113 +83,6 @@ Precomputed data files: - Existing Comments: \`${commentsPath}\` - -**Step 0: Understand the PR intent** - -1. Read the PR description from \`${descriptionPath}\` to understand the purpose and scope of the changes. -2. If the PR description contains a ticket URL (e.g., Jira, Linear, GitHub issue link) or a ticket ID, **always fetch it** using FetchUrl or the appropriate tool to understand the full requirements and acceptance criteria. This context is critical for evaluating whether the implementation is correct and complete. - - - -- You are currently checked out to the PR branch. -- Review ALL modified files in the PR branch. -- Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems. -- High-signal bug patterns to actively check for (only comment when evidenced in the diff): - - Null/undefined/Optional dereferences; missing-key errors on untrusted/external dict/JSON payloads - - Resource leaks (unclosed files/streams/connections; missing cleanup on error paths) - - Injection vulnerabilities (SQL injection, XSS, command/template injection) and auth/security invariant violations - - OAuth/CSRF invariants: state must be per-flow unpredictable and validated; avoid deterministic/predictable state or missing state checks - - Concurrency/race/atomicity hazards (TOCTOU, lost updates, unsafe shared state, process/thread lifecycle bugs) - - Missing error handling for critical operations (network, persistence, auth, migrations, external APIs) - - Wrong-variable/shadowing mistakes; contract mismatches (serializer/validated_data, interfaces/abstract methods) - - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches) - - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics) -- Do NOT duplicate comments already in \`${commentsPath}\`. -- Only flag issues you are confident about—avoid speculative or stylistic nitpicks. - - - -**Step 1: Analyze and group the modified files** - -Before reviewing, you must triage the PR to enable parallel review: - -1. Read the diff file (\`${diffPath}\`) to identify ALL modified files -2. Group the files into logical clusters based on: - - **Related functionality**: Files in the same module or feature area - - **File relationships**: A component and its tests, a class and its interface - - **Risk profile**: Security-sensitive files together, database/migration files together - - **Dependencies**: Files that import each other or share types - -3. Document your grouping briefly, for example: - - Group 1 (Auth): src/auth/login.ts, src/auth/session.ts, tests/auth.test.ts - - Group 2 (API handlers): src/api/users.ts, src/api/orders.ts - - Group 3 (Database): src/db/migrations/001.ts, src/db/schema.ts - -Guidelines for grouping: -- Aim for 3-6 groups to balance parallelism with context coherence -- Keep related files together so reviewers have full context -- Each group should be reviewable independently - - - -**Step 2: Spawn parallel subagents to review each group** - -After grouping, use the Task tool to spawn parallel \`file-group-reviewer\` subagents. Each subagent will review one group of files independently. - -**IMPORTANT**: Spawn ALL subagents in a single response to enable parallel execution. - -For each group, invoke the Task tool with: -- \`subagent_type\`: "file-group-reviewer" -- \`description\`: Brief label (e.g., "Review auth module") -- \`prompt\`: Must include: - 1. The PR context (repo, PR number, base/head refs) - 2. The list of assigned files for this group - 3. The relevant diff sections for those files (extract from \`${diffPath}\`) - 4. Instructions to return a JSON array of findings - -Example Task invocation for one group: -\`\`\` -Task( - subagent_type: "file-group-reviewer", - description: "Review auth module", - prompt: """ - Review the following files from PR #${prNumber} in ${repoFullName}. - - PR Context: - - Head SHA: ${prHeadSha} - - Base Ref: ${prBaseRef} - - Assigned files: - - src/auth/login.ts - - src/auth/session.ts - - tests/auth.test.ts - - Diff for these files: - - - Return a JSON array of issues found. If no issues, return []. - """ -) -\`\`\` - -Spawn all group reviewers in parallel by including multiple Task calls in one response. - - - -**Step 3: Aggregate subagent results** - -After all subagents complete, collect and merge their findings: - -1. **Collect results**: Each subagent returns a JSON array of comment objects -2. **Merge arrays**: Combine all arrays into a single comments array -3. **Add commit_id**: Add \`"commit_id": "${prHeadSha}"\` to each comment object -4. **Deduplicate**: If multiple subagents flagged the same location (same path + line), keep only one comment (prefer higher priority: P0 > P1 > P2) -5. **Filter existing**: Remove any comments that duplicate issues already in \`${commentsPath}\` -6. **Write reviewSummary**: Synthesize a 1-3 sentence overall assessment based on all findings - -Write the final aggregated result to \`${reviewCandidatesPath}\` using the schema in \`\`. - - Write output to \`${reviewCandidatesPath}\` using this exact schema: @@ -167,7 +99,7 @@ Write output to \`${reviewCandidatesPath}\` using this exact schema: "comments": [ { "path": "src/index.ts", - "body": "[P1] Title\n\n1 paragraph.", + "body": "[P1] Title\\n\\n1 paragraph.", "line": 42, "startLine": null, "side": "RIGHT", @@ -192,10 +124,10 @@ Write output to \`${reviewCandidatesPath}\` using this exact schema: - **comments**: Array of comment objects - \`path\`: Relative file path (e.g., "src/index.ts") - - \`body\`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation +${bodyFieldDescription} - \`line\`: Target line number (single-line) or end line number (multi-line). Must be ≥ 0. - \`startLine\`: \`null\` for single-line comments, or start line number for multi-line comments - - \`side\`: "RIGHT" for new/modified code (default), "LEFT" only for removed code +${sideFieldDescription} - \`commit_id\`: "${prHeadSha}" - **reviewSummary**: diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts deleted file mode 100644 index 5863ab2..0000000 --- a/src/create-prompt/templates/review-prompt.ts +++ /dev/null @@ -1,389 +0,0 @@ -import type { PreparedContext } from "../types"; - -export function generateReviewPrompt(context: PreparedContext): string { - const prNumber = context.eventData.isPR - ? context.eventData.prNumber - : context.githubContext && "entityNumber" in context.githubContext - ? String(context.githubContext.entityNumber) - : "unknown"; - - const repoFullName = context.repository; - const headRefName = context.prBranchData?.headRefName ?? "unknown"; - const headSha = context.prBranchData?.headRefOid ?? "unknown"; - const baseRefName = context.eventData.baseBranch ?? "unknown"; - - const diffPath = - context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff"; - const commentsPath = - context.reviewArtifacts?.commentsPath ?? - "$RUNNER_TEMP/droid-prompts/existing_comments.json"; - const descriptionPath = - context.reviewArtifacts?.descriptionPath ?? - "$RUNNER_TEMP/droid-prompts/pr_description.txt"; - - return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}. -The gh CLI is installed and authenticated via GH_TOKEN. - -### Context - -* Repo: ${repoFullName} -* PR Number: ${prNumber} -* PR Head Ref: ${headRefName} -* PR Head SHA: ${headSha} -* PR Base Ref: ${baseRefName} -* The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output. - -### Pre-computed Review Artifacts - -The following files have been pre-computed and contain the COMPLETE data for this PR: - -* **PR Description**: \`${descriptionPath}\` - Contains the PR title and description (body) explaining the intent and scope of the changes -* **Full PR Diff**: \`${diffPath}\` - Contains the COMPLETE diff of ALL changed files (already computed via \`git merge-base\` and \`git diff\`) -* **Existing Comments**: \`${commentsPath}\` - Contains all existing PR comments and reviews in JSON format - -**IMPORTANT**: Use these pre-computed files instead of running git or gh commands to fetch diff/comments. This ensures you have access to the COMPLETE data without truncation. - ---- - -## CRITICAL INSTRUCTION - -**DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE IN THE DIFF.** - -You MUST: -1. Read the ENTIRE \`${diffPath}\` file first (use offset/limit if needed for large files) -2. Create a mental checklist of ALL files that were changed -3. Review EACH file systematically - do not skip any file -4. Only submit your review AFTER you have analyzed every single changed file - -If the diff is large, work through it methodically. Do not rush. Do not skip files. The quality of your review depends on thoroughness. - ---- - -## Objectives - -1. Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (**do NOT programmatically resolve threads**). -2. Review the PR diff and identify **high-confidence, actionable bugs** introduced by this PR. -3. Leave concise **inline comments (1-2 sentences)** for qualifying bugs. You may comment on unchanged lines *only* if the PR clearly triggers the issue—explain the trigger path. - ---- - -## Procedure - -Follow these phases **in order**. Do not submit findings until Phase 1 and Phase 2 are complete. - ---- - -## Phase 1: Context Gathering (REQUIRED — do not report bugs yet) - -1. **Read the PR description** to understand the intent and scope: - \`Read ${descriptionPath}\` - -2. **Read existing comments** from the pre-computed file: - \`Read ${commentsPath}\` - -3. **Read the COMPLETE diff** from the pre-computed file: - \`Read ${diffPath}\` - - If the file is large (>2400 lines), read it in chunks using offset/limit parameters. - **DO NOT proceed until you have read the ENTIRE diff.** - -4. **List all changed files** - After reading the diff, explicitly list every file that was changed. This is your checklist. - -5. For **each file in the diff**, gather context: - - * New imports → Grep to confirm the symbol exists - * New/modified functions → Grep for callers to understand usage - * Data-processing code → Read surrounding code to infer expected types - -6. Do **not** identify or report bugs yet. This phase is for understanding only. - ---- - -## Phase 2: Issue Identification (ONLY after Phase 1) - -Review **every changed line**. You must complete the review even if you find issues early. - -### Analysis discipline - -* Verify with Grep/Read before flagging (no speculation) -* Trace data flow to confirm a **real trigger path** -* Check whether the pattern exists elsewhere (may be intentional) - -### Cross-reference checks - -* When reviewing tests, search for related constants, configs, or environment variables -* Verify test assumptions match production behavior - *Example:* if a test sets an env var, Grep where it is consumed to confirm behavior matches prod - -### Import verification - -* Any import referencing a non-existent symbol is a bug (runtime ImportError) - ---- - -## Systematic Analysis Patterns - -Apply these patterns during your review. These target common bug classes: - -### Logic & Variable Usage - -* When reviewing conditionals, verify the correct variable is referenced in each clause -* Check for AND vs OR confusion in permission/validation logic (especially security-critical paths) -* Verify return statements return the intended value: - - Not wrapper objects when data is expected (e.g., \`safeParse()\` result vs \`.data\`) - - Not intermediate variables when final result is expected - - Not wrong object properties (e.g., \`obj.original\` vs \`obj.modified\`) -* In loops or transformations, confirm variable names match semantic purpose -* Flag operations where variable names suggest type mismatches (e.g., \`*Time\` used where \`*Date\` expected) - -### Null/Undefined Safety - -* For each property access chain (\`a.b.c\`), verify none of the intermediate values can be null/undefined/None: - - Check API responses that might return null - - Check database queries that might return no results - - Check array operations like \`array[0]\` or \`array.find()\` -* When Optional types are unwrapped (\`.get()\`, \`!\`, non-null assertions), verify presence is checked first -* In conditional branches, verify null handling exists for all code paths -* Pay special attention to: - - Authentication/authorization contexts (user might not be authenticated) - - Optional relationships (foreign keys, joined data) - - Map/dictionary lookups - - Configuration values that might not be set - -### Type Compatibility & Data Flow - -* Trace what types flow into math operations: - - \`floor\`, \`ceil\`, \`round\`, modulo on datetime/string inputs cause errors - - Arithmetic operations on mixed types -* Verify comparison operators match types: - - Object reference equality (\`===\`, \`==\`) on different instances always false - - Comparing object types when value comparison intended (e.g., dayjs objects) -* Check function parameters receive expected types after transformations: - - Serialization/deserialization boundary crossings - - Database field type conversions - - API request/response parsing -* Flag string operations on numbers or vice versa without explicit conversion -* When data flows through multiple functions, verify type consistency at each step - -### Async/Await Patterns (JavaScript/TypeScript) - -* **CRITICAL**: Flag \`forEach\`/\`map\`/\`filter\` with async callbacks - these don't await: - \`\`\`javascript - // BUG: async callbacks are fire-and-forget - items.forEach(async (item) => await process(item)); - - // CORRECT: use for...of or Promise.all - for (const item of items) await process(item); - \`\`\` -* Verify all async function calls are awaited when their result or side-effect is needed: - - Database writes that must complete before continuing - - API calls whose responses are used - - File operations that affect subsequent code -* Check promise chains have proper error handling (\`.catch()\` or try/catch) -* Verify parallel operations use \`Promise.all\`/\`Promise.allSettled\` appropriately - -### Security Vulnerabilities - -* **SSRF (Server-Side Request Forgery)**: - - Flag unvalidated URL fetching: \`open(url)\`, \`fetch(user_input)\`, \`curl\` with user input - - Verify URL allowlists exist for external requests -* **XSS (Cross-Site Scripting)**: - - Check for unescaped user input in HTML/template contexts - - Verify template engines auto-escape by default -* **Authentication & Session State**: - - OAuth state/nonce must be per-request random, not static or reused - - CSRF tokens must be unpredictable and verified - - Session data must not leak between requests -* **Input Validation Bypasses**: - - \`indexOf()\`/\`startsWith()\` for origin validation can be bypassed (use exact allowlist matching) - - Case sensitivity in security checks (email blacklists, domain checks) -* **Timing Attacks**: - - Secret/token comparison should use constant-time comparison functions -* **Cache Poisoning**: - - Verify security decisions (permissions, auth) aren't cached asymmetrically - - If grants are cached, denials must also be cached (or neither) - -### Concurrency Issues (when applicable) - -* Check if shared state is modified without synchronization: - - Global variables or class fields accessed by multiple threads/goroutines - - Database records that can be modified concurrently -* Verify double-checked locking re-checks condition after acquiring lock: - \`\`\`go - // BUG: doesn't re-check after lock - if !initialized { - lock.Lock() - initialize() - lock.Unlock() - } - - // CORRECT: re-check after acquiring lock - if !initialized { - lock.Lock() - if !initialized { - initialize() - } - lock.Unlock() - } - \`\`\` -* Flag non-atomic operations on shared counters: - - \`count = count + 1\` in concurrent code (use atomic operations) - - Read-modify-write without locks -* Check that resources accessed by multiple threads have proper synchronization - -### API Contract & Breaking Changes - -* When serializers/validators/response formatters change: - - Use Grep to find API consumers and test files - - Verify response structure remains compatible (field names, types, nesting) - - Check for removed fields, changed types, or new required fields -* When database models/schemas change: - - Verify migrations include data backfill for existing records - - Check that existing queries still work with new schema -* When function signatures change: - - Grep for all callers to verify compatibility - - Check if return type changes break caller assumptions -* Review test files for expected API shapes and verify changes match tests - ---- - -## **Reporting Gate (CRITICAL)** - -Only report findings that meet **at least one** of the following: - -### Reportable bugs - -* **Definite runtime failures** (TypeError, KeyError, AttributeError, ImportError) -* **Incorrect logic** with a clear trigger path and observable wrong result -* **Security vulnerabilities** with a realistic exploit path -* **Data corruption or loss** -* **Breaking contract changes** (API / response / schema / validator behavior) where the contract is discoverable in code, tests, or docs - -### Do NOT report - -* Test code hygiene (unused vars, setup patterns) unless it causes test failure -* Defensive "what-if" scenarios without a realistic trigger -* Cosmetic issues (message text, naming, formatting) -* Suggestions to "add guards," "add try/catch," or "be safer" without a concrete failure - -### Confidence rule - -* Prefer **DEFINITE** bugs over **POSSIBLE** bugs -* Report POSSIBLE bugs **only** if you can identify a realistic execution path - ---- - -## Targeted semantic passes (apply when relevant) - -* **API / validator / serializer changes** - Explicitly check for response-format or contract breakage - *(e.g., changed error response structure, removed or renamed fields, different status codes, altered required keys)* - -* **Auth / OAuth / session / state changes** - Check null-state handling, per-request randomness (state/nonce), and failure paths - ---- - -## Deduplication - -* Never open a new finding for an issue previously reported by this bot on this PR -* If an issue appears fixed, reply "resolved" in the existing thread - ---- - -## Priority Levels - -* [P0] Blocking / crash / exploit -* [P1] Urgent correctness or security issue -* [P2] Real bug with limited impact -* [P3] Minor but real bug - ---- - -## Comment format - -Each inline comment must be: - -**[P0-P3] Clear imperative title (≤80 chars)** - - -(blank line) - -One short paragraph explaining *why* this is a bug and *how* it manifests. - -* Max 1 paragraph -* Code snippets ≤3 lines, Markdown fenced -* Matter-of-fact, non-accusatory tone - ---- - -## Phase 3: Submit Review - -### When NOT to submit - -* PR is formatting-only -* You cannot anchor a high-confidence issue to a specific changed line -* All findings are low-severity (P2/P3) -* All findings fail the Reporting Gate above - -### Tools & mechanics - -* Use \`github_inline_comment___create_inline_comment\` - * Anchor using **path + side + line** - * RIGHT = new/modified code, LEFT = removed code - * Line numbers must correspond to the chosen side -* Use \`github_pr___submit_review\` for the summary -* Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments -* Use \`github_pr___reply_to_comment\` to acknowledge resolved issues -* **Do NOT call** \`github_pr___resolve_review_thread\` -* Do **not** approve or request changes - -### "No issues" handling - -* If no issues and a prior "no issues" comment exists → skip -* If no issues and no prior comment exists → post a brief summary -* If issues exist and a prior "no issues" comment exists → delete/minimize it -* **Do NOT delete** comment ID ${context.droidCommentId} - ---- - -## Review summary - -In the submitted review body: - -* State whether the changes are correct or incorrect -* Provide a 1-3 sentence overall assessment -${ - context.outputFilePath - ? ` ---- - -## Output File (REQUIRED) - -After completing your review, you MUST write your findings to \`${context.outputFilePath}\` as a JSON file with this structure: - -\`\`\`json -{ - "type": "code-review", - "findings": [ - { - "id": "CR-001", - "severity": "P0|P1|P2|P3", - "file": "path/to/file.ts", - "line": 55, - "side": "RIGHT", - "description": "Brief description of the issue", - "suggestion": "Optional suggested fix" - } - ], - "summary": "Brief overall summary of the review" -} -\`\`\` - -If no issues were found, write: \`{"type": "code-review", "findings": [], "summary": "No issues found"}\` - -This file is required for downstream processing of review results. -` - : "" -}`; -} diff --git a/src/create-prompt/templates/review-validator-prompt.ts b/src/create-prompt/templates/review-validator-prompt.ts index 6058e91..1ad44c1 100644 --- a/src/create-prompt/templates/review-validator-prompt.ts +++ b/src/create-prompt/templates/review-validator-prompt.ts @@ -30,10 +30,18 @@ export function generateReviewValidatorPrompt( process.env.REVIEW_VALIDATED_PATH ?? "$RUNNER_TEMP/droid-prompts/review_validated.json"; + const includeSuggestions = context.includeSuggestions !== false; + + const skillInstruction = includeSuggestions + ? "Invoke the 'review' skill to load the review methodology, then execute its **Pass 2: Validation** procedure — including suggestion block rules." + : "Invoke the 'review' skill to load the review methodology, then execute its **Pass 2: Validation** procedure. Do NOT include code suggestion blocks."; + return `You are validating candidate review comments for PR #${prNumber} in ${repoFullName}. IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline. +${skillInstruction} + ### Context * Repo: ${repoFullName} @@ -44,84 +52,30 @@ IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline. ### Inputs -Read: +Read these files before validating: * PR Description: \`${descriptionPath}\` * Candidates: \`${reviewCandidatesPath}\` * Full PR Diff: \`${diffPath}\` * Existing Comments: \`${commentsPath}\` -### Outputs +If the diff is large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.** -1) Write validated results to: \`${reviewValidatedPath}\` -2) Post ONLY the approved inline comments to the PR -3) Submit a PR review summary (if applicable) - -======================= - -## CRITICAL REQUIREMENTS +### Critical Requirements 1. You MUST read and validate **every** candidate before posting anything. -2. For each candidate, confirm: - * It is a real, actionable bug (not speculative) - * There is a realistic trigger path and observable wrong behavior - * The anchor is valid (path + side + line/startLine correspond to the diff) -3. **Posting rule (STRICT):** - * Only post comments where \`status === "approved"\`. - * Never post rejected items. -4. Preserve ordering: keep results in the same order as candidates. - -======================= - -## Phase 1: Load context (REQUIRED) - -1. Read the PR description: - Read \`${descriptionPath}\` - -2. Read existing comments: - Read \`${commentsPath}\` - -3. Read the COMPLETE diff: - Read \`${diffPath}\` - If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.** - -4. Read candidates: - Read \`${reviewCandidatesPath}\` +2. Preserve ordering: keep results in the same order as candidates. +3. **Posting rule (STRICT):** Only post comments where \`status === "approved"\`. Never post rejected items. -======================= - -## Phase 2: Validate candidates - -Apply the same Reporting Gate as review: - -### Approve ONLY if at least one is true -* Definite runtime failure -* Incorrect logic with a concrete trigger path and wrong outcome -* Security vulnerability with realistic exploit -* Data corruption/loss -* Breaking contract change (discoverable in code/tests) - -Reject if: -* It's speculative / "might" without a concrete trigger -* It's stylistic / naming / formatting -* It's not anchored to a valid changed line -* It's already reported (dedupe against existing comments) - -When rejecting, write a concise reason. - -======================= - -## Phase 3: Write review_validated.json (REQUIRED) - -Write \`${reviewValidatedPath}\` with this schema: +### Output: Write \`${reviewValidatedPath}\` \`\`\`json { "version": 1, "meta": { - "repo": "owner/repo", - "prNumber": 123, - "headSha": "", - "baseRef": "main", + "repo": "${repoFullName}", + "prNumber": ${prNumber}, + "headSha": "${prHeadSha}", + "baseRef": "${prBaseRef}", "validatedAt": "" }, "results": [ @@ -129,11 +83,11 @@ Write \`${reviewValidatedPath}\` with this schema: "status": "approved", "comment": { "path": "src/index.ts", - "body": "[P1] Title\n\n1 paragraph.", + "body": "[P1] Title\\n\\n1 paragraph.", "line": 42, "startLine": null, "side": "RIGHT", - "commit_id": "" + "commit_id": "${prHeadSha}" } }, { @@ -144,14 +98,14 @@ Write \`${reviewValidatedPath}\` with this schema: "line": 10, "startLine": null, "side": "RIGHT", - "commit_id": "" + "commit_id": "${prHeadSha}" }, "reason": "Not a real bug because ..." } ], "reviewSummary": { "status": "approved", - "body": "1–3 sentence overall assessment" + "body": "1-3 sentence overall assessment" } } \`\`\` @@ -160,20 +114,20 @@ Notes: * Use \`commit_id\` = \`${prHeadSha}\`. * \`results\` MUST have exactly one entry per candidate, in the same order. -Then write the file using the local file tool. - Tooling note: * If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path. * Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file. -======================= - -## Phase 4: Post approved items +### Post approved items After writing \`${reviewValidatedPath}\`, post comments ONLY for \`status === "approved"\`: -* For each approved entry, call \`github_inline_comment___create_inline_comment\` with the \`comment\` object. -* Submit a review via \`github_pr___submit_review\` using the summary body (if there are any approved items OR a meaningful overall assessment). +* Collect all approved comments and submit them as a **single batched review** via \`github_pr___submit_review\`, passing them in the \`comments\` array parameter. +* Do **NOT** post comments individually — batch them all into one \`submit_review\` call. +* Do **NOT** include a \`body\` parameter in \`submit_review\`. +* Use \`github_comment___update_droid_comment\` to update the tracking comment with the review summary. +* If any approved comments contain \`[security]\` in their body, prepend a security badge to the tracking comment: \`![Security Review](https://img.shields.io/badge/security%20review-ran-blue)\`. This indicates that security analysis was performed as part of the review. +* Do **NOT** post the summary as a separate comment or as the body of \`submit_review\`. * Do not approve or request changes. `; } diff --git a/src/create-prompt/templates/security-report-prompt.ts b/src/create-prompt/templates/security-report-prompt.ts index 7a1a0be..636760f 100644 --- a/src/create-prompt/templates/security-report-prompt.ts +++ b/src/create-prompt/templates/security-report-prompt.ts @@ -43,7 +43,7 @@ The gh CLI is installed and authenticated via GH_TOKEN. ## Security Skills Available -You have access to these Factory security skills (installed in ~/.factory/skills/): +You have access to security skills from the security-engineer plugin (security-engineer@factory-plugins): 1. **threat-model-generation** - Generate STRIDE-based threat model for the repository 2. **commit-security-scan** - Scan code for security vulnerabilities diff --git a/src/create-prompt/templates/security-review-prompt.ts b/src/create-prompt/templates/security-review-prompt.ts index 7e4425f..b8969d8 100644 --- a/src/create-prompt/templates/security-review-prompt.ts +++ b/src/create-prompt/templates/security-review-prompt.ts @@ -1,6 +1,8 @@ import type { PreparedContext } from "../types"; -export function generateSecurityReviewPrompt(context: PreparedContext): string { +export function generateSecurityCandidatesPrompt( + context: PreparedContext, +): string { const prNumber = context.eventData.isPR ? context.eventData.prNumber : context.githubContext && "entityNumber" in context.githubContext @@ -8,161 +10,99 @@ export function generateSecurityReviewPrompt(context: PreparedContext): string { : "unknown"; const repoFullName = context.repository; - const headRefName = context.prBranchData?.headRefName ?? "unknown"; - const headSha = context.prBranchData?.headRefOid ?? "unknown"; - const baseRefName = context.eventData.baseBranch ?? "unknown"; - - // Extract security configuration from context - const securityConfig = context.githubContext?.inputs; - const severityThreshold = - securityConfig?.securitySeverityThreshold ?? "medium"; - const blockOnCritical = securityConfig?.securityBlockOnCritical ?? true; - const blockOnHigh = securityConfig?.securityBlockOnHigh ?? false; - const notifyTeam = securityConfig?.securityNotifyTeam ?? ""; - - return `You are performing a security-focused code review for PR #${prNumber} in ${repoFullName}. -The gh CLI is installed and authenticated via GH_TOKEN. - -## Context -- Repo: ${repoFullName} -- PR Number: ${prNumber} -- PR Head Ref: ${headRefName} -- PR Head SHA: ${headSha} -- PR Base Ref: ${baseRefName} - -## Security Configuration -- Severity Threshold: ${severityThreshold} (only report findings at or above this level) -- Block on Critical: ${blockOnCritical} -- Block on High: ${blockOnHigh} -${notifyTeam ? `- Notify Team: ${notifyTeam} (mention on critical findings)` : ""} - -## Security Skills Available - -You have access to these Factory security skills (installed in ~/.factory/skills/): - -1. **threat-model-generation** - Generate STRIDE-based threat model for the repository -2. **commit-security-scan** - Scan code changes for security vulnerabilities -3. **vulnerability-validation** - Validate findings, assess exploitability, filter false positives -4. **security-review** - Comprehensive security review and patch generation - -## Review Workflow - -### Step 1: Threat Model Check -- Check if \`.factory/threat-model.md\` exists in the repository -- If missing: Note this in the summary (threat model generation is done separately, not during PR review) -- If exists: Use it as context for the security scan - -### Step 2: Security Scan -- Invoke the **commit-security-scan** skill on the PR diff -- Gather the PR diff using: \`gh pr diff ${prNumber} --repo ${repoFullName}\` -- Get file changes: \`gh api repos/${repoFullName}/pulls/${prNumber}/files --paginate\` -- Focus analysis on: - - New code introduced by this PR - - Modified code that may introduce vulnerabilities - - Changes that expose existing vulnerabilities - -### Step 3: Validate Findings -- For each finding from Step 2, invoke the **vulnerability-validation** skill -- Assess: - - Reachability: Is the vulnerable code reachable from user input? - - Exploitability: How easy is it to exploit? - - Impact: What's the potential damage? -- Filter out false positives and findings below the severity threshold (${severityThreshold}) - -### Step 4: Report Findings -- For each confirmed finding at or above ${severityThreshold} severity: - - Record in the JSON output file - - Include: severity, STRIDE category, CWE ID, clear explanation, suggested fix -- For auto-fixable issues: Include the suggested patch in the finding's suggestion field - -## Security Scope (STRIDE Categories) - -**Spoofing** (S): -- Weak authentication, session hijacking, token exposure - -**Tampering** (T): -- SQL/NoSQL/command injection, XSS, mass assignment, unsafe deserialization - -**Repudiation** (R): -- Missing audit logs, unsigned transactions - -**Information Disclosure** (I): -- IDOR, verbose errors, hardcoded secrets, sensitive data in logs - -**Denial of Service** (D): -- Missing rate limits, resource exhaustion, ReDoS - -**Elevation of Privilege** (E): -- Missing authorization checks, role manipulation, privilege escalation - -## Severity Definitions - -| Severity | Criteria | Examples | -|----------|----------|----------| -| **CRITICAL** | Immediately exploitable, high impact, no auth required | RCE, hardcoded secrets, auth bypass | -| **HIGH** | Exploitable with conditions, significant impact | SQL injection, stored XSS, IDOR | -| **MEDIUM** | Requires specific conditions, moderate impact | CSRF, info disclosure, missing rate limits | -| **LOW** | Difficult to exploit, low impact | Verbose errors, missing security headers | - -## Output Requirements - -IMPORTANT: Do NOT post inline comments directly. Instead, write findings to a JSON file. -The finalize step will post all inline comments to avoid overlapping with code review comments. - -1. Write findings to \`${context.outputFilePath || "security-review-results.json"}\` with this structure: -\`\`\`json -{ - "type": "security", - "findings": [ - { - "id": "SEC-001", - "severity": "CRITICAL|HIGH|MEDIUM|LOW", - "type": "SQL Injection", - "stride": "T", - "cwe": "CWE-89", - "file": "path/to/file.ts", - "line": 55, - "side": "RIGHT", - "description": "Brief description of the vulnerability", - "suggestion": "Optional code fix" - } - ], - "summary": "Brief overall summary", - "block_pr": ${blockOnCritical || blockOnHigh ? "true if CRITICAL/HIGH found" : "false"} -} -\`\`\` + const prHeadRef = context.prBranchData?.headRefName ?? "unknown"; + const prHeadSha = context.prBranchData?.headRefOid ?? "unknown"; + const prBaseRef = context.eventData.baseBranch ?? "unknown"; -2. Update the tracking comment using \`github_comment___update_droid_comment\` + const diffPath = + context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff"; + const commentsPath = + context.reviewArtifacts?.commentsPath ?? + "$RUNNER_TEMP/droid-prompts/existing_comments.json"; + const descriptionPath = + context.reviewArtifacts?.descriptionPath ?? + "$RUNNER_TEMP/droid-prompts/pr_description.txt"; -## Summary Format (for tracking comment update) + const reviewCandidatesPath = + process.env.REVIEW_CANDIDATES_PATH ?? + "$RUNNER_TEMP/droid-prompts/review_candidates.json"; -Use \`github_comment___update_droid_comment\` to update the tracking comment with this format: + return `You are a senior security engineer performing a security-focused code review. -\`\`\`markdown -## 🔐 Security Review Summary +Your task: Review PR #${prNumber} in ${repoFullName} and generate a JSON file with **high-confidence security vulnerability** findings. -| Severity | Count | -|----------|-------| -| 🚨 CRITICAL | X | -| 🔴 HIGH | X | -| 🟡 MEDIUM | X | -| 🟢 LOW | X | +Invoke the 'security-review' skill to load the security review methodology, then execute its **Pass 1: Candidate Generation** procedure. -### Findings -| ID | Severity | Type | File | Line | Reference | -|----|----------|------|------|------|-----------| -| SEC-001 | CRITICAL | SQL Injection | auth.ts | 55 | [CWE-89](https://cwe.mitre.org/data/definitions/89.html) | -| SEC-002 | HIGH | XSS | client.ts | 98 | [CWE-79](https://cwe.mitre.org/data/definitions/79.html) | + +Repo: ${repoFullName} +PR Number: ${prNumber} +PR Head Ref: ${prHeadRef} +PR Head SHA: ${prHeadSha} +PR Base Ref: ${prBaseRef} -${notifyTeam ? `cc ${notifyTeam}` : ""} -\`\`\` +Precomputed data files: +- PR Description: \`${descriptionPath}\` +- Full PR Diff: \`${diffPath}\` +- Existing Comments: \`${commentsPath}\` + -## Accuracy Requirements + +Write output to \`${reviewCandidatesPath}\` using this exact schema: + +\`\`\`json +{ + "version": 1, + "meta": { + "repo": "owner/repo", + "prNumber": 123, + "headSha": "", + "baseRef": "main", + "generatedAt": "" + }, + "comments": [ + { + "path": "src/index.ts", + "body": "[P1] [security] Title\\n\\n1 paragraph.", + "line": 42, + "startLine": null, + "side": "RIGHT", + "commit_id": "" + } + ], + "reviewSummary": { + "body": "1-3 sentence security assessment" + } +} +\`\`\` -- Base findings strictly on the current diff and repository context -- False positives are very costly—only report high-confidence findings -- If confidence is low, ask a clarifying question instead of asserting a vulnerability -- Never raise purely stylistic concerns -- Cap at 10 inline comments total + +- **version**: Always \`1\` + +- **meta**: Metadata object + - \`repo\`: "${repoFullName}" + - \`prNumber\`: ${prNumber} + - \`headSha\`: "${prHeadSha}" + - \`baseRef\`: "${prBaseRef}" + - \`generatedAt\`: ISO 8601 timestamp + +- **comments**: Array of comment objects + - \`path\`: Relative file path + - \`body\`: Comment text starting with priority tag [P0|P1|P2|P3] and \`[security]\` tag, then title, then 1 paragraph explanation + - \`line\`: Target line number (single-line) or end line number (multi-line). Must be >= 0. + - \`startLine\`: \`null\` for single-line comments, or start line number for multi-line comments + - \`side\`: "RIGHT" for new/modified code (default), "LEFT" only for removed code + - \`commit_id\`: "${prHeadSha}" + +- **reviewSummary**: + - \`body\`: 1-3 sentence security assessment + + + + +**DO NOT** post to GitHub. +**DO NOT** invoke any PR mutation tools (inline comments, submit review, delete/minimize/reply/resolve, etc.). +**DO NOT** modify any files other than writing to \`${reviewCandidatesPath}\`. +Output ONLY the JSON file — no additional commentary. + `; } diff --git a/src/create-prompt/types.ts b/src/create-prompt/types.ts index cf110c3..f9945e0 100644 --- a/src/create-prompt/types.ts +++ b/src/create-prompt/types.ts @@ -118,4 +118,5 @@ export type PreparedContext = CommonFields & { }; reviewArtifacts?: ReviewArtifacts; outputFilePath?: string; + includeSuggestions?: boolean; }; diff --git a/src/entrypoints/generate-review-prompt.ts b/src/entrypoints/generate-review-prompt.ts index 0db78d0..46886d0 100644 --- a/src/entrypoints/generate-review-prompt.ts +++ b/src/entrypoints/generate-review-prompt.ts @@ -12,18 +12,15 @@ import { fetchPRBranchData } from "../github/data/pr-fetcher"; import { computeReviewArtifacts } from "../github/data/review-artifacts"; import { createPrompt } from "../create-prompt"; import { prepareMcpTools } from "../mcp/install-mcp-server"; -import { generateReviewPrompt } from "../create-prompt/templates/review-prompt"; import { generateReviewCandidatesPrompt } from "../create-prompt/templates/review-candidates-prompt"; -import { generateSecurityReviewPrompt } from "../create-prompt/templates/security-review-prompt"; +import { generateSecurityCandidatesPrompt } from "../create-prompt/templates/security-review-prompt"; import { normalizeDroidArgs, parseAllowedTools } from "../utils/parse-tools"; +import { resolveReviewConfig } from "../utils/review-depth"; async function run() { try { const githubToken = process.env.GITHUB_TOKEN!; const reviewType = process.env.REVIEW_TYPE || "code"; - const reviewUseValidator = - reviewType === "code" && - (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; const commentId = parseInt(process.env.DROID_COMMENT_ID || "0"); if (!commentId) { @@ -91,17 +88,16 @@ async function run() { githubToken, }); - // Select prompt generator based on review type and validator mode + // Select prompt generator based on review type const generatePrompt = reviewType === "security" - ? generateSecurityReviewPrompt - : reviewUseValidator - ? generateReviewCandidatesPrompt - : generateReviewPrompt; + ? generateSecurityCandidatesPrompt + : generateReviewCandidatesPrompt; // Pass the output file path so the prompt can instruct the Droid // to write structured findings for the combine step const outputFilePath = process.env.DROID_OUTPUT_FILE || undefined; + const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false"; await createPrompt({ githubContext: context, @@ -114,6 +110,7 @@ async function run() { generatePrompt, reviewArtifacts, outputFilePath, + includeSuggestions, }); // Set run type @@ -142,30 +139,20 @@ async function run() { // Task tool is needed for parallel subagent reviews in candidate generation phase. // FetchUrl is needed to fetch linked tickets from the PR description. - const candidateGenerationTools = reviewUseValidator - ? ["Task", "FetchUrl"] - : []; - - // When validator is enabled, the candidate generation phase should NOT - // have access to PR mutation tools. When disabled, allow them. - const reviewTools = reviewUseValidator - ? [] - : ["github_pr___list_review_comments"]; - - const safeUserAllowedMCPTools = reviewUseValidator - ? userAllowedMCPTools.filter( - (tool) => - tool === "github_comment___update_droid_comment" || - (!tool.startsWith("github_pr___") && - tool !== "github_inline_comment___create_inline_comment"), - ) - : userAllowedMCPTools; + // Skill is needed so subagents can invoke review/security-review skills. + const candidateGenerationTools = ["Task", "FetchUrl", "Skill"]; + + const safeUserAllowedMCPTools = userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ); const allowedTools = Array.from( new Set([ ...baseTools, ...candidateGenerationTools, - ...reviewTools, ...safeUserAllowedMCPTools, ]), ); @@ -184,14 +171,19 @@ async function run() { droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); droidArgParts.push('--tag "code-review"'); - const reviewModel = + const rawModel = reviewType === "security" ? process.env.SECURITY_MODEL?.trim() || process.env.REVIEW_MODEL?.trim() : process.env.REVIEW_MODEL?.trim(); - const reasoningEffort = process.env.REASONING_EFFORT?.trim(); - if (reviewModel) { - droidArgParts.push(`--model "${reviewModel}"`); + const { model, reasoningEffort } = resolveReviewConfig({ + reviewModel: rawModel, + reasoningEffort: process.env.REASONING_EFFORT?.trim(), + reviewDepth: process.env.REVIEW_DEPTH?.trim(), + }); + + if (model) { + droidArgParts.push(`--model "${model}"`); } if (reasoningEffort) { droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); @@ -204,11 +196,10 @@ async function run() { // Output for next step - use core.setOutput which handles GITHUB_OUTPUT internally core.setOutput("droid_args", droidArgParts.join(" ").trim()); core.setOutput("mcp_tools", mcpTools); - core.setOutput("review_use_validator", reviewUseValidator.toString()); + // Both code and security reviews use the two-pass pipeline (candidates + validator) + core.setOutput("review_use_validator", "true"); - console.log( - `Generated ${reviewType} review prompt (validator=${reviewUseValidator})`, - ); + console.log(`Generated ${reviewType} review prompt`); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); core.setFailed(`Generate prompt failed: ${errorMessage}`); diff --git a/src/entrypoints/prepare-validator.ts b/src/entrypoints/prepare-validator.ts index b27d3a7..1382f76 100644 --- a/src/entrypoints/prepare-validator.ts +++ b/src/entrypoints/prepare-validator.ts @@ -14,13 +14,6 @@ async function run() { throw new Error("prepare-validator requires a pull request context"); } - // This entrypoint only makes sense when the workflow input is enabled. - if ((process.env.REVIEW_USE_VALIDATOR ?? "true").trim() === "false") { - throw new Error( - "reviewUseValidator must be true to run prepare-validator", - ); - } - const githubToken = await setupGitHubToken(); const octokit = createOctokit(githubToken); diff --git a/src/github/operations/comments/common.ts b/src/github/operations/comments/common.ts index d3ab2be..c23b5e8 100644 --- a/src/github/operations/comments/common.ts +++ b/src/github/operations/comments/common.ts @@ -18,17 +18,21 @@ export function createBranchLink( return `\n[View branch](${branchUrl})`; } -export type CommentType = "default" | "security"; +export type CommentType = "default" | "security" | "review_and_security"; export function createCommentBody( jobRunLink: string, branchLink: string = "", type: CommentType = "default", ): string { - const message = - type === "security" - ? "Droid is running a security check…" - : "Droid is working…"; + let message: string; + if (type === "review_and_security") { + message = "Droid is reviewing code and running a security check…"; + } else if (type === "security") { + message = "Droid is running a security check…"; + } else { + message = "Droid is working…"; + } return `${message} diff --git a/src/mcp/github-pr-server.ts b/src/mcp/github-pr-server.ts index 1323b5c..d36ab1d 100644 --- a/src/mcp/github-pr-server.ts +++ b/src/mcp/github-pr-server.ts @@ -181,6 +181,16 @@ export async function listReviewAndIssueComments({ }; } +type ReviewComment = { + path: string; + body: string; + line?: number; + side?: string; + start_line?: number; + start_side?: string; + position?: number; +}; + export async function submitReviewWithComments({ owner, repo, @@ -193,7 +203,7 @@ export async function submitReviewWithComments({ repo: string; prNumber: number; body?: string; - comments?: Array<{ path: string; position: number; body: string }>; + comments?: ReviewComment[]; octokit: OctokitLike; }): Promise { const response = await octokit.rest.pulls.createReview({ @@ -603,20 +613,50 @@ export function createGitHubPRServer({ server.tool( "submit_review", - "Submit a PR review containing inline comments", + "Submit a PR review with all inline comments batched into a single review. " + + "Use line/side to anchor comments to specific lines in the diff.", { pr_number: z.number().int().describe("PR number to review"), body: z.string().describe("Optional summary body").optional(), comments: z .array( z.object({ - path: z.string(), - position: z.number().int(), - body: z.string().min(1), + path: z + .string() + .describe("The file path to comment on (e.g., 'src/index.js')"), + body: z + .string() + .min(1) + .describe( + "The comment text (supports markdown and GitHub code suggestion blocks)", + ), + line: z + .number() + .int() + .optional() + .describe( + "Line number for single-line comments, or end line for multi-line comments", + ), + side: z + .enum(["LEFT", "RIGHT"]) + .optional() + .default("RIGHT") + .describe( + "Side of the diff: RIGHT for new/modified code, LEFT for removed code", + ), + start_line: z + .number() + .int() + .optional() + .describe("Start line for multi-line comments"), + start_side: z + .enum(["LEFT", "RIGHT"]) + .optional() + .describe("Side for the start line of multi-line comments"), }), ) .max(30) - .describe("List of inline comments to include") + .describe("List of inline comments to include in the review") .optional(), }, async ({ pr_number, body, comments }) => { diff --git a/src/tag/commands/review-validator.ts b/src/tag/commands/review-validator.ts index 4e9148d..66f1688 100644 --- a/src/tag/commands/review-validator.ts +++ b/src/tag/commands/review-validator.ts @@ -9,6 +9,7 @@ import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; import type { PrepareResult } from "../../prepare/types"; import { generateReviewValidatorPrompt } from "../../create-prompt/templates/review-validator-prompt"; +import { resolveReviewConfig } from "../../utils/review-depth"; export async function prepareReviewValidatorMode({ context, @@ -45,6 +46,8 @@ export async function prepareReviewValidatorMode({ descriptionPath: `${promptsDir}/pr_description.txt`, }; + const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false"; + await createPrompt({ githubContext: context, commentId: trackingCommentId, @@ -56,6 +59,7 @@ export async function prepareReviewValidatorMode({ }, generatePrompt: generateReviewValidatorPrompt, reviewArtifacts, + includeSuggestions, }); core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review"); @@ -76,7 +80,6 @@ export async function prepareReviewValidatorMode({ "Create", "Edit", "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", ]; const validatorTools = ["github_pr___submit_review"]; @@ -99,11 +102,14 @@ export async function prepareReviewValidatorMode({ droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); droidArgParts.push('--tag "code-review"'); - const reviewModel = process.env.REVIEW_MODEL?.trim(); - const reasoningEffort = process.env.REASONING_EFFORT?.trim(); + const { model, reasoningEffort } = resolveReviewConfig({ + reviewModel: process.env.REVIEW_MODEL?.trim(), + reasoningEffort: process.env.REASONING_EFFORT?.trim(), + reviewDepth: process.env.REVIEW_DEPTH?.trim(), + }); - if (reviewModel) { - droidArgParts.push(`--model "${reviewModel}"`); + if (model) { + droidArgParts.push(`--model "${model}"`); } if (reasoningEffort) { droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts index 4fabb33..22215f7 100644 --- a/src/tag/commands/review.ts +++ b/src/tag/commands/review.ts @@ -8,10 +8,10 @@ import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { createInitialComment } from "../../github/operations/comments/create-initial"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; import { isEntityContext } from "../../github/context"; -import { generateReviewPrompt } from "../../create-prompt/templates/review-prompt"; import { generateReviewCandidatesPrompt } from "../../create-prompt/templates/review-candidates-prompt"; import type { Octokits } from "../../github/api/client"; import type { PrepareResult } from "../../prepare/types"; +import { resolveReviewConfig } from "../../utils/review-depth"; type ReviewCommandOptions = { context: GitHubContext; @@ -85,8 +85,7 @@ export async function prepareReviewMode({ githubToken, }); - const reviewUseValidator = - (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; + const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false"; await createPrompt({ githubContext: context, @@ -97,10 +96,9 @@ export async function prepareReviewMode({ headRefName: prData.headRefName, headRefOid: prData.headRefOid, }, - generatePrompt: reviewUseValidator - ? generateReviewCandidatesPrompt - : generateReviewPrompt, + generatePrompt: generateReviewCandidatesPrompt, reviewArtifacts, + includeSuggestions, }); core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review"); @@ -124,36 +122,20 @@ export async function prepareReviewMode({ // Task tool is needed for parallel subagent reviews in candidate generation phase. // FetchUrl is needed to fetch linked tickets from the PR description. - const candidateGenerationTools = reviewUseValidator - ? ["Task", "FetchUrl"] - : []; - - const reviewTools = reviewUseValidator - ? [] - : [ - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ]; - - const safeUserAllowedMCPTools = reviewUseValidator - ? userAllowedMCPTools.filter( - (tool) => - tool === "github_comment___update_droid_comment" || - (!tool.startsWith("github_pr___") && - tool !== "github_inline_comment___create_inline_comment"), - ) - : userAllowedMCPTools; + // Skill is needed so review subagents can invoke the review-guidelines skill. + const candidateGenerationTools = ["Task", "FetchUrl", "Skill"]; + + const safeUserAllowedMCPTools = userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ); const allowedTools = Array.from( new Set([ ...baseTools, ...candidateGenerationTools, - ...reviewTools, ...safeUserAllowedMCPTools, ]), ); @@ -172,11 +154,14 @@ export async function prepareReviewMode({ droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); droidArgParts.push('--tag "code-review"'); - const reviewModel = process.env.REVIEW_MODEL?.trim(); - const reasoningEffort = process.env.REASONING_EFFORT?.trim(); + const { model, reasoningEffort } = resolveReviewConfig({ + reviewModel: process.env.REVIEW_MODEL?.trim(), + reasoningEffort: process.env.REASONING_EFFORT?.trim(), + reviewDepth: process.env.REVIEW_DEPTH?.trim(), + }); - if (reviewModel) { - droidArgParts.push(`--model "${reviewModel}"`); + if (model) { + droidArgParts.push(`--model "${model}"`); } if (reasoningEffort) { droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`); diff --git a/src/tag/commands/security-review.ts b/src/tag/commands/security-review.ts index 4033217..b30c317 100644 --- a/src/tag/commands/security-review.ts +++ b/src/tag/commands/security-review.ts @@ -1,12 +1,14 @@ import * as core from "@actions/core"; +import { execSync } from "child_process"; import type { GitHubContext } from "../../github/context"; import { fetchPRBranchData } from "../../github/data/pr-fetcher"; +import { computeReviewArtifacts } from "../../github/data/review-artifacts"; import { createPrompt } from "../../create-prompt"; import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { createInitialComment } from "../../github/operations/comments/create-initial"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; import { isEntityContext } from "../../github/context"; -import { generateSecurityReviewPrompt } from "../../create-prompt/templates/security-review-prompt"; +import { generateSecurityCandidatesPrompt } from "../../create-prompt/templates/security-review-prompt"; import type { Octokits } from "../../github/api/client"; import type { PrepareResult } from "../../prepare/types"; @@ -48,6 +50,41 @@ export async function prepareSecurityReviewMode({ currentBranch: prData.headRefName, }; + // Checkout the PR branch before computing diff + console.log( + `Checking out PR #${context.entityNumber} branch for diff computation...`, + ); + try { + execSync("git reset --hard HEAD", { encoding: "utf8", stdio: "pipe" }); + execSync(`gh pr checkout ${context.entityNumber}`, { + encoding: "utf8", + stdio: "pipe", + env: { ...process.env, GH_TOKEN: githubToken }, + }); + console.log( + `Successfully checked out PR branch: ${execSync("git rev-parse --abbrev-ref HEAD", { encoding: "utf8" }).trim()}`, + ); + } catch (e) { + console.error(`Failed to checkout PR branch: ${e}`); + throw new Error( + `Failed to checkout PR #${context.entityNumber} branch for security review`, + ); + } + + // Pre-compute review artifacts (diff, existing comments, and PR description) + const tempDir = process.env.RUNNER_TEMP || "/tmp"; + const reviewArtifacts = await computeReviewArtifacts({ + baseRef: prData.baseRefName, + tempDir, + octokit, + owner: context.repository.owner, + repo: context.repository.repo, + prNumber: context.entityNumber, + title: prData.title, + body: prData.body, + githubToken, + }); + await createPrompt({ githubContext: context, commentId, @@ -57,11 +94,11 @@ export async function prepareSecurityReviewMode({ headRefName: prData.headRefName, headRefOid: prData.headRefOid, }, - generatePrompt: generateSecurityReviewPrompt, + generatePrompt: generateSecurityCandidatesPrompt, + reviewArtifacts, }); core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-security-review"); - // Signal that security skills should be installed core.setOutput("install_security_skills", "true"); const rawUserArgs = process.env.DROID_ARGS || ""; @@ -76,21 +113,27 @@ export async function prepareSecurityReviewMode({ "Glob", "LS", "Execute", + "Edit", + "Create", + "ApplyPatch", "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", ]; - const reviewTools = [ - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ]; + const candidateGenerationTools = ["Task", "FetchUrl", "Skill"]; + + const safeUserAllowedMCPTools = userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ); const allowedTools = Array.from( - new Set([...baseTools, ...reviewTools, ...userAllowedMCPTools]), + new Set([ + ...baseTools, + ...candidateGenerationTools, + ...safeUserAllowedMCPTools, + ]), ); const mcpTools = await prepareMcpTools({ @@ -105,8 +148,8 @@ export async function prepareSecurityReviewMode({ const droidArgParts: string[] = []; droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`); + droidArgParts.push('--tag "code-review"'); - // Add model override if specified (prefer SECURITY_MODEL, fallback to REVIEW_MODEL) const securityModel = process.env.SECURITY_MODEL?.trim() || process.env.REVIEW_MODEL?.trim(); if (securityModel) { @@ -119,6 +162,8 @@ export async function prepareSecurityReviewMode({ core.setOutput("droid_args", droidArgParts.join(" ").trim()); core.setOutput("mcp_tools", mcpTools); + core.setOutput("review_pr_number", context.entityNumber.toString()); + core.setOutput("droid_comment_id", commentId.toString()); return { commentId, diff --git a/src/tag/index.ts b/src/tag/index.ts index f17e07d..dd6ae0c 100644 --- a/src/tag/index.ts +++ b/src/tag/index.ts @@ -90,26 +90,33 @@ export async function prepareTagExecution({ const commandContext = extractCommandFromContext(context); - // Determine if this is a security-related command for the initial comment - const isSecurityCommand = - context.inputs.automaticSecurityReview || - commandContext?.command === "security" || - commandContext?.command === "security-full"; + // Determine comment type based on what's being run + const isDualReview = + context.inputs.automaticReview && context.inputs.automaticSecurityReview; + const isSecurityOnly = + !isDualReview && + (context.inputs.automaticSecurityReview || + commandContext?.command === "security" || + commandContext?.command === "security-full"); + + const commentType = isDualReview + ? "review_and_security" + : isSecurityOnly + ? "security" + : "default"; const commentData = await createInitialComment( octokit.rest, context, - isSecurityCommand ? "security" : "default", + commentType, ); const commentId = commentData.id; - // Handle parallel review mode when both flags are set + // Handle when both automatic review flags are set if ( context.inputs.automaticReview && context.inputs.automaticSecurityReview ) { - // Output flags for parallel workflow jobs - const runCodeReview = true; let runSecurityReview = true; // Check if security review already exists on this PR (run once behavior) @@ -121,19 +128,22 @@ export async function prepareTagExecution({ runSecurityReview = false; } - // Set outputs for downstream jobs - core.setOutput("run_code_review", runCodeReview.toString()); + if (runSecurityReview) { + // Signal to the code review prompt to spawn a security-reviewer subagent + core.exportVariable("SECURITY_REVIEW_ENABLED", "true"); + core.setOutput("install_security_skills", "true"); + } + + core.setOutput("run_code_review", "true"); core.setOutput("run_security_review", runSecurityReview.toString()); - // For parallel mode, return early - individual jobs will run their own reviews - return { - skipped: false, - branchInfo: { - baseBranch: "", - currentBranch: "", - }, - mcpTools: "", - }; + // Prepare the code review (security review runs as a subagent within pass 1) + return prepareReviewMode({ + context, + octokit, + githubToken, + trackingCommentId: commentId, + }); } if (context.inputs.automaticReview) { @@ -165,7 +175,8 @@ export async function prepareTagExecution({ }; } - core.setOutput("run_code_review", "false"); + // Standalone security review uses the two-pass pipeline (candidates + validator) + core.setOutput("run_code_review", "true"); core.setOutput("run_security_review", "true"); return prepareSecurityReviewMode({ context, @@ -185,7 +196,8 @@ export async function prepareTagExecution({ } if (commandContext?.command === "security") { - core.setOutput("run_code_review", "false"); + // Standalone security review uses the two-pass pipeline (candidates + validator) + core.setOutput("run_code_review", "true"); core.setOutput("run_security_review", "true"); return prepareSecurityReviewMode({ context, diff --git a/src/utils/review-depth.ts b/src/utils/review-depth.ts new file mode 100644 index 0000000..3c7135e --- /dev/null +++ b/src/utils/review-depth.ts @@ -0,0 +1,41 @@ +export enum ReviewDepth { + Shallow = "shallow", + Deep = "deep", +} + +const SHALLOW_DEFAULTS = { + model: "kimi-k2-0711", + reasoningEffort: undefined as string | undefined, +}; + +const DEEP_DEFAULTS = { + model: "gpt-5.2", + reasoningEffort: "high" as string | undefined, +}; + +export const REVIEW_DEPTH_PRESETS: Record< + ReviewDepth, + { model: string; reasoningEffort: string | undefined } +> = { + [ReviewDepth.Shallow]: SHALLOW_DEFAULTS, + [ReviewDepth.Deep]: DEEP_DEFAULTS, +}; + +/** + * Resolve the effective review model and reasoning effort based on depth. + * Explicit overrides (review_model, reasoning_effort) take priority over depth presets. + */ +export function resolveReviewConfig(options?: { + reviewModel?: string; + reasoningEffort?: string; + reviewDepth?: string; +}): { model: string; reasoningEffort: string | undefined } { + const depth = (options?.reviewDepth || ReviewDepth.Deep) as ReviewDepth; + const defaults = + REVIEW_DEPTH_PRESETS[depth] ?? REVIEW_DEPTH_PRESETS[ReviewDepth.Shallow]; + + return { + model: options?.reviewModel || defaults.model, + reasoningEffort: options?.reasoningEffort || defaults.reasoningEffort, + }; +} diff --git a/test/create-prompt/templates/review-candidates-prompt.test.ts b/test/create-prompt/templates/review-candidates-prompt.test.ts index 53e97ef..28e6d71 100644 --- a/test/create-prompt/templates/review-candidates-prompt.test.ts +++ b/test/create-prompt/templates/review-candidates-prompt.test.ts @@ -52,55 +52,23 @@ describe("generateReviewCandidatesPrompt", () => { expect(prompt).toContain("pr_description.txt"); }); - it("includes understanding phase with ticket fetching instructions", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); - - const prompt = generateReviewCandidatesPrompt(context); - - expect(prompt).toContain(""); - expect(prompt).toContain("Understand the PR intent"); - expect(prompt).toContain("Read the PR description from"); - expect(prompt).toContain("/tmp/test/pr_description.txt"); - }); - - it("instructs to fetch ticket URLs from PR description", () => { - const context = createBaseContext(); - - const prompt = generateReviewCandidatesPrompt(context); - - expect(prompt).toContain("ticket URL"); - expect(prompt).toContain("ticket ID"); - expect(prompt).toContain("always fetch it"); - expect(prompt).toContain("FetchUrl"); - }); - - it("places understanding phase before review guidelines", () => { + it("instructs to invoke the review skill for Pass 1", () => { const context = createBaseContext(); const prompt = generateReviewCandidatesPrompt(context); - const understandingIdx = prompt.indexOf(""); - const guidelinesIdx = prompt.indexOf(""); - expect(understandingIdx).toBeGreaterThan(-1); - expect(guidelinesIdx).toBeGreaterThan(-1); - expect(understandingIdx).toBeLessThan(guidelinesIdx); + expect(prompt).toContain("Invoke the 'review' skill"); + expect(prompt).toContain("Pass 1: Candidate Generation"); }); - it("includes triage and parallel review phases", () => { + it("includes senior engineer framing", () => { const context = createBaseContext(); const prompt = generateReviewCandidatesPrompt(context); - expect(prompt).toContain(""); - expect(prompt).toContain(""); - expect(prompt).toContain(""); - expect(prompt).toContain("file-group-reviewer"); + expect(prompt).toContain( + "You are a senior staff software engineer and expert code reviewer", + ); }); it("includes output spec with correct schema", () => { @@ -146,4 +114,21 @@ describe("generateReviewCandidatesPrompt", () => { expect(prompt).toContain("PR Head SHA: sha123abc"); expect(prompt).toContain("PR Base Ref: develop"); }); + + it("includes suggestion block rules reference when suggestions enabled", () => { + const context = createBaseContext({ includeSuggestions: true }); + + const prompt = generateReviewCandidatesPrompt(context); + + expect(prompt).toContain("suggestion block rules"); + }); + + it("excludes suggestion blocks when suggestions disabled", () => { + const context = createBaseContext({ includeSuggestions: false }); + + const prompt = generateReviewCandidatesPrompt(context); + + expect(prompt).toContain("Do NOT include code suggestion blocks"); + expect(prompt).not.toContain("suggestion block rules"); + }); }); diff --git a/test/create-prompt/templates/review-prompt.test.ts b/test/create-prompt/templates/review-prompt.test.ts deleted file mode 100644 index 3898c6b..0000000 --- a/test/create-prompt/templates/review-prompt.test.ts +++ /dev/null @@ -1,179 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import { generateReviewPrompt } from "../../../src/create-prompt/templates/review-prompt"; -import type { PreparedContext } from "../../../src/create-prompt/types"; - -function createBaseContext( - overrides: Partial = {}, -): PreparedContext { - return { - repository: "test-owner/test-repo", - droidCommentId: "123", - triggerPhrase: "@droid", - eventData: { - eventName: "issue_comment", - commentId: "456", - prNumber: "42", - isPR: true, - commentBody: "@droid review", - }, - githubContext: undefined, - ...overrides, - } as PreparedContext; -} - -describe("generateReviewPrompt", () => { - it("includes objectives and procedure steps", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## Objectives"); - expect(prompt).toContain("Re-check existing review comments"); - expect(prompt).toContain("github_inline_comment___create_inline_comment"); - expect(prompt).toContain( - "**Do NOT call** `github_pr___resolve_review_thread`", - ); - }); - - it("includes pre-computed artifact references when provided", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("### Pre-computed Review Artifacts"); - expect(prompt).toContain("/tmp/test/pr.diff"); - expect(prompt).toContain("/tmp/test/existing_comments.json"); - expect(prompt).toContain("COMPLETE diff"); - }); - - it("includes critical instruction to review all files", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## CRITICAL INSTRUCTION"); - expect(prompt).toContain( - "DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE", - ); - expect(prompt).toContain("Review EACH file systematically"); - }); - - it("instructs to read from pre-computed files in Phase 1", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/droid/pr.diff", - commentsPath: "/tmp/droid/comments.json", - descriptionPath: "/tmp/droid/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain( - "Read existing comments** from the pre-computed file", - ); - expect(prompt).toContain("Read /tmp/droid/comments.json"); - expect(prompt).toContain( - "Read the COMPLETE diff** from the pre-computed file", - ); - expect(prompt).toContain("Read /tmp/droid/pr.diff"); - }); - - it("emphasizes accuracy gates and bug detection guidelines", () => { - const prompt = generateReviewPrompt(createBaseContext()); - - expect(prompt).toContain("## Priority Levels"); - expect(prompt).toContain("[P0]"); - expect(prompt).toContain( - "Never open a new finding for an issue previously reported by this bot", - ); - }); - - it("describes submission guidance", () => { - const prompt = generateReviewPrompt(createBaseContext()); - - expect(prompt).toContain( - "Use `github_inline_comment___create_inline_comment`", - ); - expect(prompt).toContain("Do **not** approve or request changes"); - expect(prompt).toContain("github_pr___submit_review"); - expect(prompt).toContain("### When NOT to submit"); - expect(prompt).toContain("All findings are low-severity (P2/P3)"); - }); - - it("references PR description artifact in pre-computed files", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("/tmp/test/pr_description.txt"); - expect(prompt).toContain("PR Description"); - expect(prompt).toContain( - "Contains the PR title and description (body) explaining the intent and scope", - ); - }); - - it("instructs reading PR description first in Phase 1", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/droid/pr.diff", - commentsPath: "/tmp/droid/comments.json", - descriptionPath: "/tmp/droid/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain( - "Read the PR description** to understand the intent and scope", - ); - expect(prompt).toContain("Read /tmp/droid/pr_description.txt"); - // Verify description is read before comments and diff - const descIdx = prompt.indexOf("Read the PR description"); - const commentsIdx = prompt.indexOf("Read existing comments"); - const diffIdx = prompt.indexOf("Read the COMPLETE diff"); - expect(descIdx).toBeLessThan(commentsIdx); - expect(commentsIdx).toBeLessThan(diffIdx); - }); - - it("uses fallback description path when artifacts are not provided", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("pr_description.txt"); - }); - - it("includes output file instructions when outputFilePath is set", () => { - const context = createBaseContext({ - outputFilePath: "/tmp/results/code-review-results.json", - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## Output File (REQUIRED)"); - expect(prompt).toContain("/tmp/results/code-review-results.json"); - expect(prompt).toContain('"type": "code-review"'); - expect(prompt).toContain("downstream processing of review results"); - }); - - it("does not include output file section when outputFilePath is not set", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).not.toContain("## Output File (REQUIRED)"); - }); -}); diff --git a/test/create-prompt/templates/review-validator-prompt.test.ts b/test/create-prompt/templates/review-validator-prompt.test.ts index 195f2e3..606e67e 100644 --- a/test/create-prompt/templates/review-validator-prompt.test.ts +++ b/test/create-prompt/templates/review-validator-prompt.test.ts @@ -48,34 +48,37 @@ describe("generateReviewValidatorPrompt", () => { expect(prompt).toContain("pr_description.txt"); }); - it("instructs reading PR description in Phase 1", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); + it("instructs to invoke the review skill for Pass 2", () => { + const context = createBaseContext(); const prompt = generateReviewValidatorPrompt(context); - expect(prompt).toContain("Read the PR description"); - expect(prompt).toContain("/tmp/test/pr_description.txt"); - // Verify description is read before comments - const descIdx = prompt.indexOf("Read the PR description"); - const commentsIdx = prompt.indexOf("Read existing comments"); - expect(descIdx).toBeLessThan(commentsIdx); + expect(prompt).toContain("Invoke the 'review' skill"); + expect(prompt).toContain("Pass 2: Validation"); }); - it("includes validation phases and output schema", () => { + it("preserves validating candidate review comments framing", () => { const context = createBaseContext(); const prompt = generateReviewValidatorPrompt(context); - expect(prompt).toContain("Phase 1: Load context"); - expect(prompt).toContain("Phase 2: Validate candidates"); - expect(prompt).toContain("Phase 3: Write review_validated.json"); - expect(prompt).toContain("Phase 4: Post approved items"); + expect(prompt).toContain( + "You are validating candidate review comments for PR", + ); + expect(prompt).toContain( + "Phase 2 (validator) of a two-pass review pipeline", + ); + }); + + it("instructs to post summary in tracking comment, not in submit_review body", () => { + const context = createBaseContext(); + const prompt = generateReviewValidatorPrompt(context); + + expect(prompt).toContain("github_comment___update_droid_comment"); + expect(prompt).toContain("Do **NOT** include a `body` parameter"); + expect(prompt).toContain( + "Do **NOT** post the summary as a separate comment or as the body of `submit_review`", + ); }); it("includes correct PR context", () => { @@ -100,4 +103,32 @@ describe("generateReviewValidatorPrompt", () => { expect(prompt).toContain("PR Head SHA: sha999"); expect(prompt).toContain("PR Base Ref: main"); }); + + it("includes output schema with validated results", () => { + const context = createBaseContext(); + + const prompt = generateReviewValidatorPrompt(context); + + expect(prompt).toContain("review_validated"); + expect(prompt).toContain('"version": 1'); + expect(prompt).toContain('"status": "approved"'); + expect(prompt).toContain('"status": "rejected"'); + }); + + it("includes suggestion block rules reference when suggestions enabled", () => { + const context = createBaseContext({ includeSuggestions: true }); + + const prompt = generateReviewValidatorPrompt(context); + + expect(prompt).toContain("suggestion block rules"); + }); + + it("excludes suggestion blocks when suggestions disabled", () => { + const context = createBaseContext({ includeSuggestions: false }); + + const prompt = generateReviewValidatorPrompt(context); + + expect(prompt).toContain("Do NOT include code suggestion blocks"); + expect(prompt).not.toContain("suggestion block rules"); + }); }); diff --git a/test/create-prompt/templates/security-review-prompt.test.ts b/test/create-prompt/templates/security-review-prompt.test.ts index a888d60..cd78ac2 100644 --- a/test/create-prompt/templates/security-review-prompt.test.ts +++ b/test/create-prompt/templates/security-review-prompt.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "bun:test"; -import { generateSecurityReviewPrompt } from "../../../src/create-prompt/templates/security-review-prompt"; +import { generateSecurityCandidatesPrompt } from "../../../src/create-prompt/templates/security-review-prompt"; import type { PreparedContext } from "../../../src/create-prompt/types"; function createBaseContext( @@ -21,83 +21,55 @@ function createBaseContext( } as PreparedContext; } -describe("generateSecurityReviewPrompt", () => { - it("includes security context and skill workflow", () => { - const prompt = generateSecurityReviewPrompt(createBaseContext()); +describe("generateSecurityCandidatesPrompt", () => { + it("includes security context and skill invocation", () => { + const prompt = generateSecurityCandidatesPrompt(createBaseContext()); expect(prompt).toContain("security-focused code review"); - expect(prompt).toContain("## Security Skills Available"); - expect(prompt).toContain("threat-model-generation"); - expect(prompt).toContain("commit-security-scan"); - expect(prompt).toContain("vulnerability-validation"); expect(prompt).toContain("security-review"); - expect(prompt).toContain("## Review Workflow"); - expect(prompt).toContain("gh pr diff 42 --repo test-owner/test-repo"); - expect(prompt).toContain( - "gh api repos/test-owner/test-repo/pulls/42/files", - ); - expect(prompt).toContain("security-review-results.json"); - expect(prompt).toContain("Do NOT post inline comments"); + expect(prompt).toContain("Invoke the 'security-review' skill"); + expect(prompt).toContain("Pass 1: Candidate Generation"); + expect(prompt).toContain("DO NOT"); }); - it("lists STRIDE security categories", () => { - const prompt = generateSecurityReviewPrompt(createBaseContext()); + it("includes PR context with correct values", () => { + const prompt = generateSecurityCandidatesPrompt(createBaseContext()); - expect(prompt).toContain("Spoofing"); - expect(prompt).toContain("Tampering"); - expect(prompt).toContain("Repudiation"); - expect(prompt).toContain("Information Disclosure"); - expect(prompt).toContain("Denial of Service"); - expect(prompt).toContain("Elevation of Privilege"); + expect(prompt).toContain("PR Number: 42"); + expect(prompt).toContain("test-owner/test-repo"); }); - it("includes severity definitions", () => { - const prompt = generateSecurityReviewPrompt(createBaseContext()); + it("includes output spec with correct schema", () => { + const prompt = generateSecurityCandidatesPrompt(createBaseContext()); - expect(prompt).toContain("CRITICAL"); - expect(prompt).toContain("HIGH"); - expect(prompt).toContain("MEDIUM"); - expect(prompt).toContain("LOW"); + expect(prompt).toContain("version"); + expect(prompt).toContain("meta"); + expect(prompt).toContain("comments"); + expect(prompt).toContain("reviewSummary"); + expect(prompt).toContain("commit_id"); + expect(prompt).toContain("[security]"); }); - it("uses outputFilePath when provided", () => { - const context = createBaseContext({ - outputFilePath: "/tmp/results/security-results.json", - }); - - const prompt = generateSecurityReviewPrompt(context); - - expect(prompt).toContain("/tmp/results/security-results.json"); - expect(prompt).not.toContain( - "Write findings to `security-review-results.json`", - ); - }); - - it("falls back to default filename when outputFilePath is not set", () => { - const context = createBaseContext(); + it("includes critical constraints to not post to GitHub", () => { + const prompt = generateSecurityCandidatesPrompt(createBaseContext()); - const prompt = generateSecurityReviewPrompt(context); - - expect(prompt).toContain("security-review-results.json"); + expect(prompt).toContain("DO NOT** post to GitHub"); + expect(prompt).toContain("DO NOT** invoke any PR mutation tools"); }); - it("includes security configuration from context", () => { - const contextWithConfig = createBaseContext({ - githubContext: { - inputs: { - securitySeverityThreshold: "high", - securityBlockOnCritical: true, - securityBlockOnHigh: true, - securityNotifyTeam: "@org/security-team", - }, - } as any, + it("uses precomputed data files when review artifacts provided", () => { + const context = createBaseContext({ + reviewArtifacts: { + diffPath: "/tmp/droid-prompts/pr.diff", + commentsPath: "/tmp/droid-prompts/existing_comments.json", + descriptionPath: "/tmp/droid-prompts/pr_description.txt", + }, }); - const prompt = generateSecurityReviewPrompt(contextWithConfig); + const prompt = generateSecurityCandidatesPrompt(context); - expect(prompt).toContain("Severity Threshold: high"); - expect(prompt).toContain("Block on Critical: true"); - expect(prompt).toContain("Block on High: true"); - expect(prompt).toContain("@org/security-team"); + expect(prompt).toContain("/tmp/droid-prompts/pr.diff"); + expect(prompt).toContain("/tmp/droid-prompts/existing_comments.json"); + expect(prompt).toContain("/tmp/droid-prompts/pr_description.txt"); }); }); diff --git a/test/entrypoints/prepare-validator.test.ts b/test/entrypoints/prepare-validator.test.ts index 28f9d96..4f56d89 100644 --- a/test/entrypoints/prepare-validator.test.ts +++ b/test/entrypoints/prepare-validator.test.ts @@ -6,7 +6,7 @@ import * as contextMod from "../../src/github/context"; import * as validator from "../../src/tag/commands/review-validator"; describe("prepare-validator entrypoint", () => { - it("fails when reviewUseValidator is false", async () => { + it("fails when DROID_COMMENT_ID is missing", async () => { const setFailedSpy = spyOn(core, "setFailed").mockImplementation(() => {}); const setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {}); const exitSpy = spyOn(process, "exit").mockImplementation((() => { @@ -33,8 +33,7 @@ describe("prepare-validator entrypoint", () => { isPR: true, } as any); - process.env.REVIEW_USE_VALIDATOR = "false"; - process.env.DROID_COMMENT_ID = "123"; + delete process.env.DROID_COMMENT_ID; spyOn(token, "setupGitHubToken").mockResolvedValue("token"); spyOn(client, "createOctokit").mockReturnValue({} as any); @@ -53,7 +52,7 @@ describe("prepare-validator entrypoint", () => { expect(setFailedSpy).toHaveBeenCalled(); expect(setOutputSpy).toHaveBeenCalledWith( "prepare_error", - expect.stringContaining("reviewUseValidator"), + expect.stringContaining("DROID_COMMENT_ID"), ); expect(exitSpy).toHaveBeenCalledWith(1); diff --git a/test/integration/review-flow.test.ts b/test/integration/review-flow.test.ts index e8c51d5..c430546 100644 --- a/test/integration/review-flow.test.ts +++ b/test/integration/review-flow.test.ts @@ -8,6 +8,7 @@ import * as createInitial from "../../src/github/operations/comments/create-init import * as mcpInstaller from "../../src/mcp/install-mcp-server"; import * as actorValidation from "../../src/github/validation/actor"; import * as promptModule from "../../src/create-prompt"; +import * as reviewArtifactsModule from "../../src/github/data/review-artifacts"; import * as core from "@actions/core"; import * as childProcess from "node:child_process"; @@ -22,6 +23,7 @@ describe("review command integration", () => { let setOutputSpy: ReturnType; let exportVarSpy: ReturnType; let promptSpy: ReturnType; + let computeArtifactsSpy: ReturnType; let execSyncSpy: ReturnType; beforeEach(async () => { @@ -37,6 +39,14 @@ describe("review command integration", () => { mcpSpy = spyOn(mcpInstaller, "prepareMcpTools").mockResolvedValue("{}"); actorSpy = spyOn(actorValidation, "checkHumanActor").mockResolvedValue(); promptSpy = spyOn(promptModule, "createPrompt").mockResolvedValue(); + computeArtifactsSpy = spyOn( + reviewArtifactsModule, + "computeReviewArtifacts", + ).mockResolvedValue({ + diffPath: `${tmpDir}/droid-prompts/pr.diff`, + commentsPath: `${tmpDir}/droid-prompts/existing_comments.json`, + descriptionPath: `${tmpDir}/droid-prompts/pr_description.txt`, + }); setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {}); exportVarSpy = spyOn(core, "exportVariable").mockImplementation(() => {}); @@ -57,6 +67,7 @@ describe("review command integration", () => { mcpSpy.mockRestore(); actorSpy.mockRestore(); promptSpy.mockRestore(); + computeArtifactsSpy.mockRestore(); setOutputSpy.mockRestore(); exportVarSpy.mockRestore(); execSyncSpy.mockRestore(); @@ -224,7 +235,8 @@ describe("review command integration", () => { (call: unknown[]) => call[0] === "run_security_review", ) as [string, string] | undefined; - expect(runCodeReviewCall?.[1]).toBe("false"); + // Standalone security now uses two-pass pipeline (candidates + validator) + expect(runCodeReviewCall?.[1]).toBe("true"); expect(runSecurityReviewCall?.[1]).toBe("true"); }); }); diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts index 5b8b8af..899b712 100644 --- a/test/modes/tag/review-command.test.ts +++ b/test/modes/tag/review-command.test.ts @@ -159,10 +159,8 @@ describe("prepareReviewMode", () => { allowedTools: expect.arrayContaining([ "Execute", "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___resolve_review_thread", + "Task", + "FetchUrl", ]), }), ); @@ -175,18 +173,16 @@ describe("prepareReviewMode", () => { (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; expect(droidArgsCall?.[1]).toContain("Execute"); - [ + expect(droidArgsCall?.[1]).toContain("Task"); + expect(droidArgsCall?.[1]).toContain("FetchUrl"); + expect(droidArgsCall?.[1]).toContain( "github_comment___update_droid_comment", + ); + // Candidate generation phase should NOT have PR mutation tools + expect(droidArgsCall?.[1]).not.toContain( "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ].forEach((tool) => { - expect(droidArgsCall?.[1]).toContain(tool); - }); + ); + expect(droidArgsCall?.[1]).not.toContain("github_pr___submit_review"); expect(exportVariableSpy).toHaveBeenCalledWith( "DROID_EXEC_RUN_TYPE", "droid-review", @@ -389,10 +385,9 @@ describe("prepareReviewMode", () => { const droidArgsCall = setOutputSpy.mock.calls.find( (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; - // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, no --model or --reasoning-effort - // flags are added. Defaults are handled by the action.yml inputs (gpt-5.2 / high). - expect(droidArgsCall?.[1]).not.toContain("--model"); - expect(droidArgsCall?.[1]).not.toContain("--reasoning-effort"); + // When REVIEW_MODEL is empty the deep depth preset kicks in (gpt-5.2, high reasoning). + expect(droidArgsCall?.[1]).toContain('--model "gpt-5.2"'); + expect(droidArgsCall?.[1]).toContain('--reasoning-effort "high"'); }); it("stores PR description as an artifact file", async () => { @@ -558,9 +553,7 @@ describe("prepareReviewMode", () => { ); }); - it("includes FetchUrl in allowed tools when validator mode is enabled", async () => { - process.env.REVIEW_USE_VALIDATOR = "true"; - + it("always includes Task and FetchUrl in allowed tools for candidate generation", async () => { const context = createMockContext({ eventName: "issue_comment", isPR: true, @@ -608,61 +601,6 @@ describe("prepareReviewMode", () => { (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; expect(droidArgsCall?.[1]).toContain("FetchUrl"); - - delete process.env.REVIEW_USE_VALIDATOR; - }); - - it("excludes FetchUrl from allowed tools when validator mode is disabled", async () => { - process.env.REVIEW_USE_VALIDATOR = "false"; - - const context = createMockContext({ - eventName: "issue_comment", - isPR: true, - payload: { - comment: { - id: 109, - body: "@droid review", - }, - } as any, - entityNumber: 32, - }); - - const octokit = { - rest: { - issues: { - listComments: () => Promise.resolve({ data: [] }), - }, - pulls: { - listReviewComments: () => Promise.resolve({ data: [] }), - }, - }, - graphql: () => Promise.resolve({}), - } as any; - - graphqlSpy = spyOn(octokit, "graphql").mockResolvedValue({ - repository: { - pullRequest: { - baseRefName: MOCK_PR_DATA.baseRefName, - headRefName: MOCK_PR_DATA.headRefName, - headRefOid: MOCK_PR_DATA.headRefOid, - title: MOCK_PR_DATA.title, - body: MOCK_PR_DATA.body, - }, - }, - }); - - await prepareReviewMode({ - context, - octokit, - githubToken: "token", - trackingCommentId: 562, - }); - - const droidArgsCall = setOutputSpy.mock.calls.find( - (call: unknown[]) => call[0] === "droid_args", - ) as [string, string] | undefined; - expect(droidArgsCall?.[1]).not.toContain("FetchUrl"); - - delete process.env.REVIEW_USE_VALIDATOR; + expect(droidArgsCall?.[1]).toContain("Task"); }); }); diff --git a/test/modes/tag/security-review-command.test.ts b/test/modes/tag/security-review-command.test.ts index 6cae593..01b0f28 100644 --- a/test/modes/tag/security-review-command.test.ts +++ b/test/modes/tag/security-review-command.test.ts @@ -1,9 +1,18 @@ -import { afterEach, beforeEach, describe, expect, it, spyOn } from "bun:test"; +import { + afterEach, + beforeEach, + describe, + expect, + it, + spyOn, + mock, +} from "bun:test"; import * as core from "@actions/core"; import { prepareSecurityReviewMode } from "../../../src/tag/commands/security-review"; import { createMockContext } from "../../mockContext"; import * as prFetcher from "../../../src/github/data/pr-fetcher"; +import * as reviewArtifacts from "../../../src/github/data/review-artifacts"; import * as promptModule from "../../../src/create-prompt"; import * as mcpInstaller from "../../../src/mcp/install-mcp-server"; import * as comments from "../../../src/github/operations/comments/create-initial"; @@ -14,11 +23,18 @@ const MOCK_PR_DATA = { headRefOid: "123abc", } as const; +const MOCK_REVIEW_ARTIFACTS = { + diffPath: "/tmp/droid-prompts/pr.diff", + commentsPath: "/tmp/droid-prompts/existing_comments.json", + descriptionPath: "/tmp/droid-prompts/pr_description.txt", +}; + describe("prepareSecurityReviewMode", () => { const originalArgs = process.env.DROID_ARGS; const originalReviewModel = process.env.REVIEW_MODEL; const originalSecurityModel = process.env.SECURITY_MODEL; let fetchPRSpy: ReturnType; + let computeArtifactsSpy: ReturnType; let promptSpy: ReturnType; let mcpSpy: ReturnType; let setOutputSpy: ReturnType; @@ -38,6 +54,11 @@ describe("prepareSecurityReviewMode", () => { body: "Test description", }); + computeArtifactsSpy = spyOn( + reviewArtifacts, + "computeReviewArtifacts", + ).mockResolvedValue(MOCK_REVIEW_ARTIFACTS); + promptSpy = spyOn(promptModule, "createPrompt").mockResolvedValue(); mcpSpy = spyOn(mcpInstaller, "prepareMcpTools").mockResolvedValue( "mock-config", @@ -50,10 +71,19 @@ describe("prepareSecurityReviewMode", () => { exportVariableSpy = spyOn(core, "exportVariable").mockImplementation( () => {}, ); + + // Mock execSync for git checkout + mock.module("child_process", () => ({ + execSync: (cmd: string) => { + if (cmd.includes("git rev-parse")) return "feature/security-review\n"; + return ""; + }, + })); }); afterEach(() => { fetchPRSpy.mockRestore(); + computeArtifactsSpy.mockRestore(); promptSpy.mockRestore(); mcpSpy.mockRestore(); setOutputSpy.mockRestore(); @@ -73,7 +103,7 @@ describe("prepareSecurityReviewMode", () => { } }); - it("prepares security review flow with limited toolset when tracking comment exists", async () => { + it("prepares security review flow with candidate generation toolset", async () => { const context = createMockContext({ eventName: "issue_comment", isPR: true, @@ -105,37 +135,26 @@ describe("prepareSecurityReviewMode", () => { expect.objectContaining({ allowedTools: expect.arrayContaining([ "Execute", + "Task", + "Skill", "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___resolve_review_thread", ]), }), ); + // Should NOT include inline comment or direct review tools (validator handles posting) + const mcpCall = mcpSpy.mock.calls[0] as any[]; + const allowedTools = mcpCall[0].allowedTools as string[]; + expect(allowedTools).not.toContain( + "github_inline_comment___create_inline_comment", + ); + expect(allowedTools).not.toContain("github_pr___submit_review"); + expect(createInitialSpy).not.toHaveBeenCalled(); expect(result.commentId).toBe(555); expect(result.branchInfo.baseBranch).toBe("main"); expect(result.branchInfo.currentBranch).toBe("feature/security-review"); expect(result.branchInfo.droidBranch).toBeUndefined(); - const droidArgsCall = setOutputSpy.mock.calls.find( - (call: unknown[]) => call[0] === "droid_args", - ) as [string, string] | undefined; - expect(droidArgsCall?.[1]).toContain("Execute"); - [ - "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ].forEach((tool) => { - expect(droidArgsCall?.[1]).toContain(tool); - }); - expect(exportVariableSpy).toHaveBeenCalledWith( "DROID_EXEC_RUN_TYPE", "droid-security-review", @@ -334,4 +353,37 @@ describe("prepareSecurityReviewMode", () => { '--model "claude-sonnet-4-5-20250929"', ); }); + + it("outputs review_pr_number and droid_comment_id", async () => { + const context = createMockContext({ + eventName: "issue_comment", + isPR: true, + payload: { + comment: { + id: 108, + body: "@droid security-review", + }, + } as any, + entityNumber: 31, + }); + + const octokit = { rest: {}, graphql: () => {} } as any; + + await prepareSecurityReviewMode({ + context, + octokit, + githubToken: "token", + trackingCommentId: 561, + }); + + const prNumberCall = setOutputSpy.mock.calls.find( + (call: unknown[]) => call[0] === "review_pr_number", + ) as [string, string] | undefined; + expect(prNumberCall?.[1]).toBe("31"); + + const commentIdCall = setOutputSpy.mock.calls.find( + (call: unknown[]) => call[0] === "droid_comment_id", + ) as [string, string] | undefined; + expect(commentIdCall?.[1]).toBe("561"); + }); });