Skip to content

Commit a651662

Browse files
authored
feat: add retry limit, improve ID detection (#319)
2 parents 0b56da1 + 438cc08 commit a651662

1 file changed

Lines changed: 166 additions & 48 deletions

File tree

.github/workflows/NightlyDispatcher.yml

Lines changed: 166 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,44 @@ jobs:
3030
}
3131
});
3232
33-
// Wait a moment then get the latest run
34-
await new Promise(resolve => setTimeout(resolve, 5000));
35-
const runs = await github.rest.actions.listWorkflowRuns({
36-
owner: context.repo.owner,
37-
repo: context.repo.repo,
38-
workflow_id: 'NativePipeline.yml',
39-
branch: 'main',
40-
per_page: 1
41-
});
33+
// Wait longer and find the correct run
34+
let runId = null;
35+
let attempts = 0;
36+
const maxAttempts = 12; // 2 minutes total
37+
38+
while (!runId && attempts < maxAttempts) {
39+
await new Promise(resolve => setTimeout(resolve, 10000)); // Wait 10 seconds
40+
attempts++;
41+
42+
const runs = await github.rest.actions.listWorkflowRuns({
43+
owner: context.repo.owner,
44+
repo: context.repo.repo,
45+
workflow_id: 'NativePipeline.yml',
46+
branch: 'main',
47+
per_page: 5
48+
});
49+
50+
// Find run created in the last 5 minutes
51+
const fiveMinutesAgo = new Date();
52+
fiveMinutesAgo.setMinutes(fiveMinutesAgo.getMinutes() - 5);
53+
54+
const recentRun = runs.data.workflow_runs.find(run => {
55+
const runDate = new Date(run.created_at);
56+
return runDate > fiveMinutesAgo && run.event === 'workflow_dispatch';
57+
});
58+
59+
if (recentRun) {
60+
runId = recentRun.id;
61+
console.log(`Found triggered run ID: ${runId}`);
62+
break;
63+
}
64+
}
65+
66+
if (!runId) {
67+
core.setFailed('Could not find the triggered workflow run');
68+
return;
69+
}
4270
43-
const runId = runs.data.workflow_runs[0]?.id;
4471
core.setOutput('run-id', runId);
4572
return runId;
4673
@@ -66,92 +93,183 @@ jobs:
6693
}
6794
});
6895
69-
// Wait a moment then get the latest run
70-
await new Promise(resolve => setTimeout(resolve, 5000));
71-
const runs = await github.rest.actions.listWorkflowRuns({
72-
owner: context.repo.owner,
73-
repo: context.repo.repo,
74-
workflow_id: 'NativePipeline.yml',
75-
branch: 'version/mx/10',
76-
per_page: 1
77-
});
96+
// Wait longer and find the correct run
97+
let runId = null;
98+
let attempts = 0;
99+
const maxAttempts = 12; // 2 minutes total
100+
101+
while (!runId && attempts < maxAttempts) {
102+
await new Promise(resolve => setTimeout(resolve, 10000)); // Wait 10 seconds
103+
attempts++;
104+
105+
const runs = await github.rest.actions.listWorkflowRuns({
106+
owner: context.repo.owner,
107+
repo: context.repo.repo,
108+
workflow_id: 'NativePipeline.yml',
109+
branch: 'version/mx/10',
110+
per_page: 5
111+
});
112+
113+
// Find run created in the last 5 minutes
114+
const fiveMinutesAgo = new Date();
115+
fiveMinutesAgo.setMinutes(fiveMinutesAgo.getMinutes() - 5);
116+
117+
const recentRun = runs.data.workflow_runs.find(run => {
118+
const runDate = new Date(run.created_at);
119+
return runDate > fiveMinutesAgo && run.event === 'workflow_dispatch';
120+
});
121+
122+
if (recentRun) {
123+
runId = recentRun.id;
124+
console.log(`Found triggered run ID: ${runId}`);
125+
break;
126+
}
127+
}
128+
129+
if (!runId) {
130+
core.setFailed('Could not find the triggered workflow run');
131+
return;
132+
}
78133
79-
const runId = runs.data.workflow_runs[0]?.id;
80134
core.setOutput('run-id', runId);
81135
return runId;
82136
83137
auto-retry-main:
84138
needs: dispatch-main
85139
if: always() && needs.dispatch-main.result == 'success' && github.event.schedule == '0 22 * * *'
86140
runs-on: ubuntu-latest
141+
env:
142+
RETRY_COUNT: 0 # Track retry attempts
87143
steps:
88144
- name: "Monitor and retry failed jobs"
89145
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7
90146
with:
91147
script: |
92148
const runId = '${{ needs.dispatch-main.outputs.run-id }}';
149+
const MAX_RETRIES = 1; // Only retry once
93150
94151
if (!runId || runId === 'null') {
95152
core.setFailed('No run ID available from dispatch job');
96153
return;
97154
}
98155
99-
// Poll for completion
156+
console.log(`Monitoring run ID: ${runId}`);
157+
158+
// Poll for completion with timeout
100159
let run;
160+
let pollAttempts = 0;
161+
const maxPollAttempts = 120; // 2 hours max
162+
101163
do {
164+
if (pollAttempts >= maxPollAttempts) {
165+
console.log('Monitoring timeout reached (2 hours). Stopping.');
166+
return;
167+
}
168+
102169
await new Promise(resolve => setTimeout(resolve, 60000)); // Wait 1 minute
103-
run = await github.rest.actions.getWorkflowRun({
104-
owner: context.repo.owner,
105-
repo: context.repo.repo,
106-
run_id: runId
107-
});
108-
console.log(`Run status: ${run.data.status}, conclusion: ${run.data.conclusion}`);
170+
pollAttempts++;
171+
172+
try {
173+
run = await github.rest.actions.getWorkflowRun({
174+
owner: context.repo.owner,
175+
repo: context.repo.repo,
176+
run_id: runId
177+
});
178+
console.log(`Poll #${pollAttempts}: Run status: ${run.data.status}, conclusion: ${run.data.conclusion || 'N/A'}`);
179+
} catch (error) {
180+
console.log(`Error getting run status: ${error.message}`);
181+
continue;
182+
}
109183
} while (run.data.status === 'in_progress' || run.data.status === 'queued');
110184
111-
// If there are failures, rerun failed jobs
185+
// Check if we should retry (only once)
112186
if (run.data.conclusion === 'failure') {
113-
console.log('Pipeline failed, triggering rerun of failed jobs...');
114-
await github.rest.actions.reRunWorkflowFailedJobs({
115-
owner: context.repo.owner,
116-
repo: context.repo.repo,
117-
run_id: runId
118-
});
187+
console.log('Pipeline failed. Checking retry count...');
188+
189+
// Use GitHub's run attempt number to track retries
190+
if (run.data.run_attempt <= MAX_RETRIES) {
191+
console.log(`Triggering retry (attempt ${run.data.run_attempt}/${MAX_RETRIES})...`);
192+
await github.rest.actions.reRunWorkflowFailedJobs({
193+
owner: context.repo.owner,
194+
repo: context.repo.repo,
195+
run_id: runId
196+
});
197+
console.log('Retry triggered for failed jobs only.');
198+
} else {
199+
console.log(`Maximum retries (${MAX_RETRIES}) reached. Not retrying again.`);
200+
}
201+
} else if (run.data.conclusion === 'success') {
202+
console.log('Pipeline completed successfully!');
203+
} else {
204+
console.log(`Pipeline completed with conclusion: ${run.data.conclusion}`);
119205
}
120206
121207
auto-retry-version-mx-10:
122208
needs: dispatch-version-mx-10
123209
if: always() && needs.dispatch-version-mx-10.result == 'success' && github.event.schedule == '0 4 * * *'
124210
runs-on: ubuntu-latest
211+
env:
212+
RETRY_COUNT: 0 # Track retry attempts
125213
steps:
126214
- name: "Monitor and retry failed jobs"
127215
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7
128216
with:
129217
script: |
130218
const runId = '${{ needs.dispatch-version-mx-10.outputs.run-id }}';
219+
const MAX_RETRIES = 1; // Only retry once
131220
132221
if (!runId || runId === 'null') {
133222
core.setFailed('No run ID available from dispatch job');
134223
return;
135224
}
136225
137-
// Poll for completion
226+
console.log(`Monitoring run ID: ${runId}`);
227+
228+
// Poll for completion with timeout
138229
let run;
230+
let pollAttempts = 0;
231+
const maxPollAttempts = 120; // 2 hours max
232+
139233
do {
234+
if (pollAttempts >= maxPollAttempts) {
235+
console.log('Monitoring timeout reached (2 hours). Stopping.');
236+
return;
237+
}
238+
140239
await new Promise(resolve => setTimeout(resolve, 60000)); // Wait 1 minute
141-
run = await github.rest.actions.getWorkflowRun({
142-
owner: context.repo.owner,
143-
repo: context.repo.repo,
144-
run_id: runId
145-
});
146-
console.log(`Run status: ${run.data.status}, conclusion: ${run.data.conclusion}`);
240+
pollAttempts++;
241+
242+
try {
243+
run = await github.rest.actions.getWorkflowRun({
244+
owner: context.repo.owner,
245+
repo: context.repo.repo,
246+
run_id: runId
247+
});
248+
console.log(`Poll #${pollAttempts}: Run status: ${run.data.status}, conclusion: ${run.data.conclusion || 'N/A'}`);
249+
} catch (error) {
250+
console.log(`Error getting run status: ${error.message}`);
251+
continue;
252+
}
147253
} while (run.data.status === 'in_progress' || run.data.status === 'queued');
148254
149-
// If there are failures, rerun failed jobs
255+
// Check if we should retry (only once)
150256
if (run.data.conclusion === 'failure') {
151-
console.log('Pipeline failed, triggering rerun of failed jobs...');
152-
await github.rest.actions.reRunWorkflowFailedJobs({
153-
owner: context.repo.owner,
154-
repo: context.repo.repo,
155-
run_id: runId
156-
});
257+
console.log('Pipeline failed. Checking retry count...');
258+
259+
// Use GitHub's run attempt number to track retries
260+
if (run.data.run_attempt <= MAX_RETRIES) {
261+
console.log(`Triggering retry (attempt ${run.data.run_attempt}/${MAX_RETRIES})...`);
262+
await github.rest.actions.reRunWorkflowFailedJobs({
263+
owner: context.repo.owner,
264+
repo: context.repo.repo,
265+
run_id: runId
266+
});
267+
console.log('Retry triggered for failed jobs only.');
268+
} else {
269+
console.log(`Maximum retries (${MAX_RETRIES}) reached. Not retrying again.`);
270+
}
271+
} else if (run.data.conclusion === 'success') {
272+
console.log('Pipeline completed successfully!');
273+
} else {
274+
console.log(`Pipeline completed with conclusion: ${run.data.conclusion}`);
157275
}

0 commit comments

Comments
 (0)