Skip to content

Commit ad85897

Browse files
committed
Implement timeout handling in parallel shell functions and add regression tests
1 parent 0e77392 commit ad85897

5 files changed

Lines changed: 174 additions & 12 deletions

File tree

src/bkg.sh

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@
1717

1818
source lib/owner.sh
1919

20+
run_owner_updates() {
21+
local owners_queue
22+
local status=0
23+
owners_queue=$(get_BKG_set BKG_OWNERS_QUEUE)
24+
[ -n "$owners_queue" ] || return 0
25+
26+
if [[ "$GITHUB_OWNER" = "ipitio" && "$(git branch --show-current)" = "master" ]]; then
27+
printf '%s\n' "$owners_queue" | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" update_owner --lb --halt soon,fail=1
28+
status=$?
29+
else # typically fewer owners
30+
run_parallel update_owner "$owners_queue"
31+
status=$?
32+
fi
33+
34+
return "$status"
35+
}
36+
2037
main() {
2138
local rotated=false
2239
local owners
@@ -28,6 +45,7 @@ main() {
2845
local db_size_prev
2946
local connections
3047
local return_code=0
48+
local phase_status=0
3149
local opted_out
3250
local opted_out_before
3351
local rest_first
@@ -126,25 +144,48 @@ main() {
126144
else
127145
if [ "$GITHUB_OWNER" = "ipitio" ]; then
128146
explore "$GITHUB_OWNER" >"$connections"
147+
phase_status=$?
148+
((phase_status != 3)) || return_code=3
129149
explore "$GITHUB_OWNER/$GITHUB_REPO" >>"$connections"
150+
phase_status=$?
151+
((phase_status != 3)) || return_code=3
130152

131-
# get orgs of connections
132-
while read -r connection; do curl_orgs "$connection" >>"$temp_connections"; done <"$connections"
133-
cat "$temp_connections" >>"$connections"
153+
if ((return_code != 3)); then
154+
155+
# get orgs of connections
156+
while read -r connection; do
157+
curl_orgs "$connection" >>"$temp_connections"
158+
phase_status=$?
159+
if ((phase_status == 3)); then
160+
return_code=3
161+
break
162+
fi
163+
done <"$connections"
164+
cat "$temp_connections" >>"$connections"
165+
fi
134166

135167
sed -i 's/^[[:space:]]*//;s/[[:space:]]*$//; /^$/d; /^0\/$/d' "$connections"
136168
# shellcheck disable=SC2319
137169
BKG_PAGE_ALL=$(
138170
(($(wc -l <"$BKG_OWNERS") < $(($(sort -u "$connections" | wc -l) + 100))))
139171
echo "$?"
140172
)
141-
seq 1 2 | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" page_owner --lb --halt soon,fail=1
173+
if ((return_code != 3)); then
174+
seq 1 2 | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" page_owner --lb --halt soon,fail=1
175+
phase_status=$?
176+
((phase_status != 3)) || return_code=3
177+
fi
142178
else
143179
get_membership "$GITHUB_OWNER" >"$connections"
180+
phase_status=$?
181+
((phase_status != 3)) || return_code=3
144182
[ "$BKG_IS_FIRST" = "false" ] || : >"$BKG_OWNERS"
145183
[ "$BKG_IS_FIRST" = "false" ] || : >"$BKG_OPTOUT"
146184
fi
147185

186+
if ((return_code == 3)); then
187+
echo "Reached BKG_MAX_LEN, stopping after persisting state..."
188+
else
148189
if (( 9999 < pkg_done )) || (( pkg_left < 4 )) || [[ "${db_size_curr::-4}" == "${db_size_prev::-4}" ]]; then
149190
BKG_BATCH_FIRST_STARTED=$today
150191
set_BKG BKG_BATCH_FIRST_STARTED "$today"
@@ -168,6 +209,7 @@ main() {
168209
rm -f all_owners_in_db all_owners_tu owners_updated owners_partially_updated owners_stale
169210
set_BKG BKG_DIFF "$db_size_curr"
170211
set_BKG BKG_REST_TO_TOP "$((1 - rest_first))"
212+
fi
171213
fi
172214
else
173215
save_owner "$GITHUB_OWNER"
@@ -182,10 +224,13 @@ main() {
182224
BKG_BATCH_FIRST_STARTED=$(get_BKG BKG_BATCH_FIRST_STARTED)
183225
[ -d "$BKG_INDEX_DIR" ] || mkdir "$BKG_INDEX_DIR"
184226

185-
if [[ "$GITHUB_OWNER" = "ipitio" && "$(git branch --show-current)" = "master" ]]; then
186-
get_BKG_set BKG_OWNERS_QUEUE | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" update_owner --lb
187-
else # typically fewer owners
188-
run_parallel update_owner "$(get_BKG_set BKG_OWNERS_QUEUE)"
227+
if ((return_code != 3)); then
228+
run_owner_updates
229+
phase_status=$?
230+
if ((phase_status == 3)); then
231+
return_code=3
232+
echo "Reached BKG_MAX_LEN, stopping after persisting state..."
233+
fi
189234
fi
190235

191236
set_BKG BKG_OUT "$(wc -l <"$BKG_OPTOUT")"

src/lib/util.sh

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,17 @@ parallel_shell_func() {
288288
[ -n "$2" ] || return
289289
local source_file=$1
290290
local function_name=$2
291+
local status
291292
shift 2
292293

293294
parallel "$@" bash "$BKG_ROOT/src/lib/parallel-worker.sh" "$source_file" "$function_name"
295+
status=$?
296+
297+
if ((status == 2)) && [ "$(get_BKG BKG_TIMEOUT)" = "1" ]; then
298+
return 3
299+
fi
300+
301+
return "$status"
294302
}
295303

296304
parallel_async_status() {
@@ -453,12 +461,14 @@ get_owners() {
453461
curl_users() {
454462
local users
455463
users="$(curl "https://github.com/$1" | grep -oP 'href="/.+?".*>' | tr -d '\0' | grep -Ev '( .*|\?(return_to|tab))=' | tr -d '\0' | grep -oP '/.*?"' | cut -c2- | rev | cut -c2- | rev | grep -v "/")"
464+
(($? != 3)) || return 3
456465
[ -z "$2" ] && echo "$users" || get_owners "$users"
457466
}
458467

459468
curl_orgs() {
460469
local orgs
461470
orgs="$(curl "https://github.com/$1" | grep -oP '/orgs/[^/]+' | tr -d '\0' | cut -d'/' -f3)"
471+
(($? != 3)) || return 3
462472
[ -z "$2" ] && echo "$orgs" || get_owners "$orgs"
463473
}
464474

@@ -467,6 +477,7 @@ explore() {
467477
local is_repo=false
468478
local is_user=false
469479
local got_orgs=false
480+
local status=0
470481
[[ ! "$node" =~ .*\/.* ]] || is_repo=true
471482
[ "$is_repo" = true ] && local graph=("stargazers" "watchers" "forks" "collaborators") || local graph=("followers" "following" "people")
472483
[ -z "$2" ] || graph=("$2")
@@ -477,23 +488,37 @@ explore() {
477488
local nodes
478489

479490
if [ "$is_repo" = true ]; then
480-
[ "$edge" = "collaborators" ] && nodes=$(query_api "repos/$node/collaborators?per_page=100&page=$page" | jq -r '.[] | select(.id and .login) | "\(.id)/\(.login)"' 2>/dev/null) || nodes=$(curl_users "$node/$edge?page=$page")
491+
if [ "$edge" = "collaborators" ]; then
492+
nodes=$(query_api "repos/$node/collaborators?per_page=100&page=$page" | jq -r '.[] | select(.id and .login) | "\(.id)/\(.login)"' 2>/dev/null)
493+
status=$?
494+
else
495+
nodes=$(curl_users "$node/$edge?page=$page")
496+
status=$?
497+
fi
481498
else
482499
if [ "$is_user" = false ]; then
483500
nodes=$(curl_users "orgs/$node/$edge?page=$page") # org
501+
status=$?
502+
((status != 3)) || return 3
484503
[ -n "$nodes" ] || is_user=true
485504
fi
486505

487506
if [ "$is_user" = true ]; then
488507
nodes=$(curl_users "$node?tab=$edge&page=$page") # user
508+
status=$?
509+
((status != 3)) || return 3
489510

490511
if [ "$got_orgs" = false ]; then
491512
curl_orgs "$node"
513+
status=$?
514+
((status != 3)) || return 3
492515
got_orgs=true
493516
fi
494517
fi
495518
fi
496519

520+
((status != 3)) || return 3
521+
497522
grep -v "$(cut -d'/' -f1 <<<"$node")" <<<"$nodes"
498523
[[ "$(wc -l <<<"$nodes")" -ge $([ "$edge" = "collaborators" ] && echo 100 || echo 15) ]] || break
499524
((page++))
@@ -503,9 +528,12 @@ explore() {
503528

504529
get_membership() {
505530
local owner
531+
local people_page
506532
owner=$(cut -d'/' -f2 <<<"$1")
533+
people_page=$(curl "https://github.com/orgs/$owner/people")
534+
(($? != 3)) || return 3
507535

508-
if [ -n "$(grep -zoP 'href="/orgs/'"$owner"'/people"' <<<"$(curl "https://github.com/orgs/$owner/people")" | tr -d '\0')" ]; then
536+
if [ -n "$(grep -zoP 'href="/orgs/'"$owner"'/people"' <<<"$people_page" | tr -d '\0')" ]; then
509537
explore "$owner" "people"
510538
else
511539
curl_orgs "$owner"

src/test/lib.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ assert_file_exists() {
2020
}
2121

2222
assert_contains() {
23-
grep -Fq "$2" "$1" || fail "Expected $1 to contain $2"
23+
grep -Fq -- "$2" "$1" || fail "Expected $1 to contain $2"
2424
}
2525

2626
assert_not_contains() {
27-
! grep -Fq "$2" "$1" || fail "Expected $1 to not contain $2"
27+
! grep -Fq -- "$2" "$1" || fail "Expected $1 to not contain $2"
2828
}

src/test/regression.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ set -euo pipefail
44

55
test_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
66

7+
bash "$test_dir/timeout.sh"
78
bash "$test_dir/discovery.sh"
89
bash "$test_dir/arrays.sh"

src/test/timeout.sh

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/bin/bash
2+
3+
# shellcheck disable=SC1091,SC2034
4+
5+
set -euo pipefail
6+
7+
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib.sh"
8+
src_dir=${src_dir:?}
9+
workdir=${workdir:?}
10+
11+
test_parallel_shell_func_timeout_fallback() {
12+
local fixture_file="$workdir/timeout-worker.sh"
13+
local input_file="$workdir/timeout-input.txt"
14+
local status=0
15+
16+
cat >"$fixture_file" <<'EOF'
17+
#!/bin/bash
18+
19+
timeout_worker() {
20+
return 3
21+
}
22+
EOF
23+
24+
printf 'one\ntwo\n' >"$input_file"
25+
BKG_ENV="$workdir/env-timeout.env"
26+
: >"$BKG_ENV"
27+
set_BKG BKG_TIMEOUT "1"
28+
29+
if parallel_shell_func "$fixture_file" timeout_worker --lb <"$input_file"; then
30+
fail "Expected parallel_shell_func to surface timeout status 3"
31+
else
32+
status=$?
33+
fi
34+
35+
[ "$status" -eq 3 ] || fail "Expected parallel_shell_func to return 3 after timeout, got $status"
36+
}
37+
38+
test_run_owner_updates_halts_on_timeout() {
39+
local args_file="$workdir/owner-update.args"
40+
local stdin_file="$workdir/owner-update.stdin"
41+
local status=0
42+
43+
get_BKG_set() {
44+
printf '1/alpha\n2/beta\n'
45+
}
46+
47+
git() {
48+
if [ "$1" = "branch" ] && [ "$2" = "--show-current" ]; then
49+
echo master
50+
return 0
51+
fi
52+
53+
command git "$@"
54+
}
55+
56+
parallel_shell_func() {
57+
printf '%s\n' "$@" >"$args_file"
58+
cat >"$stdin_file"
59+
return 3
60+
}
61+
62+
GITHUB_OWNER=ipitio
63+
64+
if run_owner_updates; then
65+
fail "Expected run_owner_updates to return 3 when owner workers time out"
66+
else
67+
status=$?
68+
fi
69+
70+
[ "$status" -eq 3 ] || fail "Expected run_owner_updates to return 3, got $status"
71+
assert_contains "$args_file" "update_owner"
72+
assert_contains "$args_file" "--halt"
73+
assert_contains "$args_file" "soon,fail=1"
74+
assert_contains "$stdin_file" "1/alpha"
75+
assert_contains "$stdin_file" "2/beta"
76+
}
77+
78+
trap cleanup EXIT
79+
80+
pushd "$src_dir" >/dev/null
81+
export BKG_SKIP_DEP_VERIFY=1
82+
source bkg.sh
83+
popd >/dev/null
84+
85+
test_parallel_shell_func_timeout_fallback
86+
test_run_owner_updates_halts_on_timeout
87+
88+
echo "Timeout propagation regression tests passed"

0 commit comments

Comments
 (0)