Skip to content

Commit af5a3c1

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 2f65560 + 0e77392 commit af5a3c1

14 files changed

Lines changed: 913 additions & 132 deletions

File tree

.github/workflows/publish.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ jobs:
2525
- name: Checkout repository
2626
uses: actions/checkout@v4
2727

28+
- name: Regression tests
29+
run: bash src/test/regression.sh
30+
2831
- name: Log in to the Container registry
2932
uses: docker/login-action@v3.1.0
3033
with:

src/bkg.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ main() {
121121
if [ "$BKG_MODE" -ne 2 ]; then
122122
if [ "$BKG_MODE" -eq 0 ] || [ "$BKG_MODE" -eq 3 ]; then
123123
if $fast_out; then
124-
grep -oP '^[^\/]+' "$BKG_OPTOUT" | env_parallel --lb save_owner
124+
grep -oP '^[^\/]+' "$BKG_OPTOUT" | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" save_owner --lb
125125
return_code=1
126126
else
127127
if [ "$GITHUB_OWNER" = "ipitio" ]; then
@@ -138,14 +138,14 @@ main() {
138138
(($(wc -l <"$BKG_OWNERS") < $(($(sort -u "$connections" | wc -l) + 100))))
139139
echo "$?"
140140
)
141-
seq 1 2 | env_parallel --lb --halt soon,fail=1 page_owner
141+
seq 1 2 | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" page_owner --lb --halt soon,fail=1
142142
else
143143
get_membership "$GITHUB_OWNER" >"$connections"
144144
[ "$BKG_IS_FIRST" = "false" ] || : >"$BKG_OWNERS"
145145
[ "$BKG_IS_FIRST" = "false" ] || : >"$BKG_OPTOUT"
146146
fi
147147

148-
if (( pkg_left < pkg_done )) || [[ "${db_size_curr::-4}" == "${db_size_prev::-4}" ]]; then
148+
if (( 9999 < pkg_done )) || (( pkg_left < 4 )) || [[ "${db_size_curr::-4}" == "${db_size_prev::-4}" ]]; then
149149
BKG_BATCH_FIRST_STARTED=$today
150150
set_BKG BKG_BATCH_FIRST_STARTED "$today"
151151
rm -f packages_to_update
@@ -164,7 +164,7 @@ main() {
164164
grep -vFxf all_owners_in_db "$BKG_OWNERS" >owners.tmp
165165
mv owners.tmp "$BKG_OWNERS"
166166
rest_first=$(get_BKG BKG_REST_TO_TOP)
167-
bash lib/get.sh "$rest_first" "$connections" $request_limit "$GITHUB_OWNER" "$BKG_OWNERS" "$BKG_INDEX_DIR" | env_parallel --lb save_owner
167+
bash lib/get.sh "$rest_first" "$connections" $request_limit "$GITHUB_OWNER" "$BKG_OWNERS" "$BKG_INDEX_DIR" | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" save_owner --lb
168168
rm -f all_owners_in_db all_owners_tu owners_updated owners_partially_updated owners_stale
169169
set_BKG BKG_DIFF "$db_size_curr"
170170
set_BKG BKG_REST_TO_TOP "$((1 - rest_first))"
@@ -173,7 +173,7 @@ main() {
173173
save_owner "$GITHUB_OWNER"
174174
get_membership "$GITHUB_OWNER" >"$connections"
175175
if [ -s "$connections" ]; then
176-
env_parallel --lb save_owner <"$connections" || while read -r connection; do save_owner "$connection"; done <"$connections"
176+
parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" save_owner --lb <"$connections" || while read -r connection; do save_owner "$connection"; done <"$connections"
177177
fi
178178
fi
179179

@@ -183,7 +183,7 @@ main() {
183183
[ -d "$BKG_INDEX_DIR" ] || mkdir "$BKG_INDEX_DIR"
184184

185185
if [[ "$GITHUB_OWNER" = "ipitio" && "$(git branch --show-current)" = "master" ]]; then
186-
get_BKG_set BKG_OWNERS_QUEUE | env_parallel --lb update_owner
186+
get_BKG_set BKG_OWNERS_QUEUE | parallel_shell_func "$BKG_ROOT/src/lib/owner.sh" update_owner --lb
187187
else # typically fewer owners
188188
run_parallel update_owner "$(get_BKG_set BKG_OWNERS_QUEUE)"
189189
fi

src/lib/get.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ get_remaining() {
3737
grep -Fxf "$1" "$2"
3838
}
3939

40+
get_discovered() {
41+
{
42+
get_requests "$3"
43+
[ -n "$2" ] && echo "$2"
44+
cat "$1"
45+
} | awk 'NF && !seen[$0]++'
46+
}
47+
4048
get_owners(){
4149
git -C "$6" log --name-only --pretty=format:%ct -- . | awk '
4250
/^[0-9]+$/ { ts=$0; next } # commit timestamp line
@@ -45,6 +53,7 @@ index($0,"/")==0 { next } # skip root-level files
4553
{ split($0,a,"/"); d=a[1]; if(!(d in seen)) seen[d]=ts }
4654
END { for(d in seen) printf "%s %s\n", seen[d], d }
4755
' | sort -n | cut -d' ' -f2- >complete_owners
56+
get_discovered "$2" "$4" "$5" | grep -vFxf all_owners_in_db -
4857
get_remaining complete_owners "$2" "$4" "$5" | grep -vFxf all_owners_in_db -
4958
rm -f complete_owners
5059
[ "$1" = "0" ] || get_remaining owners_stale "$2" "$4" "$5"

src/lib/owner.sh

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,14 @@ update_owner() {
131131

132132
if [ -n "$owner_repos" ]; then
133133
echo "Creating $owner array..."
134-
find "$BKG_INDEX_DIR/$owner" -type f -name '*.json' ! -name '.*' -print0 | xargs -0 jq -cs '[.] | add' >"$BKG_INDEX_DIR/$owner/.json.tmp"
135-
jq -cs '{ ("package"): . }' "$BKG_INDEX_DIR/$owner/.json.tmp" >"$BKG_INDEX_DIR/$owner/.json"
134+
find "$BKG_INDEX_DIR/$owner" -type f -name '*.json' ! -name '.*' -print0 | xargs -0 jq -cs '.' >"$BKG_INDEX_DIR/$owner/.json.tmp"
135+
mv -f "$BKG_INDEX_DIR/$owner/.json.tmp" "$BKG_INDEX_DIR/$owner/.json"
136136
bash lib/ytoxt.sh "$BKG_INDEX_DIR/$owner/.json"
137-
jq -c '.package[]' "$BKG_INDEX_DIR/$owner/.json" >"$BKG_INDEX_DIR/$owner/.json.tmp" 2>/dev/null
138-
mv -f "$BKG_INDEX_DIR/$owner/.json.tmp" "$BKG_INDEX_DIR/$owner/.json" 2>/dev/null
139137

140138
echo "Creating $owner repo arrays..."
141139
parallel "jq -c --arg repo {} '[.[] | select(.repo == \$repo)]' \"$BKG_INDEX_DIR/$owner/.json\" > \"$BKG_INDEX_DIR/$owner/{}/.json.tmp\"" <<<"$owner_repos"
142-
xargs -I {} bash -c "jq -cs '{ (\"package\"): . }' \"$BKG_INDEX_DIR/$owner/{}/.json.tmp\" > \"$BKG_INDEX_DIR/$owner/{}/.json\"" <<<"$owner_repos"
143-
xargs -I {} bash -c "bash lib/ytoxt.sh \"$BKG_INDEX_DIR/$owner/{}/.json\"" <<<"$owner_repos"
144-
xargs -I {} bash -c "jq -c '.package[]' \"$BKG_INDEX_DIR/$owner/{}/.json\" > \"$BKG_INDEX_DIR/$owner/{}/.json.tmp\"" 2>/dev/null <<<"$owner_repos"
145140
xargs -I {} mv -f "$BKG_INDEX_DIR/$owner/{}/.json.tmp" "$BKG_INDEX_DIR/$owner/{}/.json" 2>/dev/null <<<"$owner_repos"
141+
xargs -I {} bash -c "bash lib/ytoxt.sh \"$BKG_INDEX_DIR/$owner/{}/.json\"" <<<"$owner_repos"
146142
fi
147143

148144
sed -i '/^\(.*\/\)*'"$owner"'$/d' "$BKG_OWNERS"

src/lib/package.sh

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,10 @@ update_package() {
5858
local raw_downloads_week=-1
5959
local raw_downloads_day=-1
6060
local size=-1
61-
local versions_json=""
6261
local version_count=-1
6362
local version_with_tag_count=-1
6463
local version_newest_id=-1
6564
local latest_version=-1
66-
local latest_tags
6765
local owner_rank
6866
local repo_rank
6967
package_type=$(cut -d'/' -f1 <<<"$1")
@@ -92,7 +90,7 @@ update_package() {
9290
return
9391
fi
9492
done < <(grep "$owner" "$BKG_OPTOUT")
95-
elif $fast_out; then
93+
elif [ "${fast_out:-false}" = "true" ]; then
9694
return
9795
fi
9896

@@ -120,35 +118,80 @@ update_package() {
120118
echo "Updating $owner/$package..."
121119
raw_downloads=$(grep -Pzo 'Total downloads[^"]*"\d*' <<<"$html" | grep -Pzo '\d*$' | tr -d '\0') # https://stackoverflow.com/a/74214537
122120
sqlite3 "$BKG_INDEX_DB" "select id from '$table_version_name' where date >= '$BKG_BATCH_FIRST_STARTED';" | sort -u >"${table_version_name}"_already_updated
123-
local break_now=false
121+
local max_version_pages=3
122+
local tag_cache_pages=3
123+
local page=1
124+
local pages_left=0
125+
local pipeline_status=0
126+
local update_versions_status=0
127+
local version_lines
124128

125-
for page in $(seq 0 5); do
126-
((page > 0)) || continue
127-
local pages_left=0
129+
version_reset_pipeline "$tag_cache_pages"
130+
131+
page_version "$page"
132+
pages_left=$?
133+
if ((pages_left == 3)); then
134+
parallel_async_wait || :
135+
rm -f "${table_version_name}"_already_updated
136+
return 3
137+
fi
138+
139+
version_lines=$(jq -r '.[] | @base64' <<<"$VERSION_PAGE_JSON")
140+
if [ -n "$version_lines" ]; then
141+
version_hydrate_candidates "$version_lines" 0
142+
pipeline_status=$?
143+
144+
if ((pipeline_status != 3)); then
145+
version_submit_current_page_candidates 5 false
146+
pipeline_status=$?
147+
fi
148+
149+
if ((pipeline_status != 3)); then
150+
version_collect_current_page_provisional 5
151+
version_resolve_provisional_candidates "$tag_cache_pages"
152+
pipeline_status=$?
153+
fi
154+
fi
155+
156+
while ((pipeline_status != 3)) && ((pages_left != 2)) && ((page < max_version_pages)) && ((${#VERSION_PROVISIONAL_IDS[@]} > 0)); do
157+
((page++))
128158
page_version "$page"
129159
pages_left=$?
130-
versions_json=$(jq -c -s '.' "$BKG_INDEX_DIR/$owner/$repo/$package".*.json 2>/dev/null)
131-
rm -f "$BKG_INDEX_DIR/$owner/$repo/$package".*.json
132-
((pages_left != 3)) || return 3
133-
jq -e . <<<"$versions_json" &>/dev/null || versions_json="[{\"id\":\"-1\",\"name\":\"latest\",\"tags\":\"\"}]"
134-
! jq -e 'length > 1' <<<"$versions_json" &>/dev/null || versions_json=$(jq -c 'map(select(.id >= 0))' <<<"$versions_json")
135-
[ -n "$latest_tags" ] || latest_tags=$(
136-
jq -r '
137-
[ .[]
138-
| select(.tags | split(",") | map(gsub("^\\s+|\\s+$";"")) | any(. == "latest"))
139-
| .tags
140-
][0] // ""
141-
' <<<"$versions_json"
142-
)
143-
latest_tags=$(perl -pe 's/(?<!\\)"/\\"/g' <<<"$latest_tags")
144-
run_parallel update_version "$(jq -r '.[] | @base64' <<<"$versions_json")"
145-
(($? != 3)) || return 3
146-
((pages_left != 2)) || break
147-
! $break_now || break
148-
[ -z "$latest_tags" ] || break_now=true
160+
161+
if ((pages_left == 3)); then
162+
pipeline_status=3
163+
break
164+
fi
165+
166+
version_lines=$(jq -r '.[] | @base64' <<<"$VERSION_PAGE_JSON")
167+
[ -n "$version_lines" ] || continue
168+
169+
version_hydrate_candidates "$version_lines" 0
170+
pipeline_status=$?
171+
((pipeline_status != 3)) || break
172+
version_promote_current_page_candidates "$tag_cache_pages"
173+
pipeline_status=$?
149174
done
150175

176+
if ((pipeline_status != 3)) && ((${#VERSION_SOURCE_LINES[@]} == 0)); then
177+
version_store_fallback_candidate
178+
version_submit_candidate "-1"
179+
pipeline_status=$?
180+
fi
181+
182+
if ((pipeline_status != 3)); then
183+
for version_id in "${VERSION_PROVISIONAL_IDS[@]}"; do
184+
version_submit_candidate "$version_id"
185+
pipeline_status=$?
186+
((pipeline_status != 3)) || break
187+
done
188+
fi
189+
190+
parallel_async_wait
191+
update_versions_status=$?
192+
151193
rm -f "${table_version_name}"_already_updated
194+
((pipeline_status != 3 && update_versions_status != 3)) || return 3
152195
fi
153196

154197
# calculate the overall downloads and size

src/lib/parallel-worker.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
# shellcheck disable=SC1090,SC1091
3+
4+
source_file=$1
5+
function_name=$2
6+
shift 2
7+
8+
[ -n "$source_file" ] || exit 1
9+
[ -n "$function_name" ] || exit 1
10+
11+
export BKG_SKIP_DEP_VERIFY=1
12+
cd "$(dirname "$source_file")/.." || exit 1
13+
source "$source_file"
14+
"$function_name" "$@"

src/lib/util.sh

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,16 @@ yq_install() {
3030
sudonot chmod +x /usr/bin/yq
3131
}
3232

33-
echo "Verifying dependencies..."
34-
apt_install git curl jq parallel sqlite3 sqlite3-pcre zstd libxml2-utils
35-
yq -V | grep -q mikefarah 2>/dev/null || yq_install
36-
echo "Dependencies verified!"
37-
# shellcheck disable=SC2046
38-
source $(which env_parallel.bash)
39-
env_parallel --session
33+
if [ -z "${BKG_UTIL_BOOTSTRAPPED:-}" ]; then
34+
if [ "${BKG_SKIP_DEP_VERIFY:-0}" != "1" ]; then
35+
echo "Verifying dependencies..."
36+
apt_install git curl jq parallel sqlite3 sqlite3-pcre zstd libxml2-utils
37+
yq -V | grep -q mikefarah 2>/dev/null || yq_install
38+
echo "Dependencies verified!"
39+
fi
40+
41+
BKG_UTIL_BOOTSTRAPPED=1
42+
fi
4043
GITHUB_OWNER=${GITHUB_OWNER:-ipitio}
4144
GITHUB_REPO=${GITHUB_REPO:-backage}
4245
BKG_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"/../..
@@ -174,10 +177,9 @@ check_limit() {
174177
local min_passed
175178
local rate_limit_start
176179
rate_limit_end=$(date -u +%s)
177-
[ -n "$BKG_SCRIPT_START" ] && rate_limit_start="$BKG_SCRIPT_START" || {
178-
rate_limit_start=$(get_BKG BKG_SCRIPT_START)
179-
[ -n "$rate_limit_start" ] || echo "BKG_SCRIPT_START empty!"
180-
}
180+
rate_limit_start=$(get_BKG BKG_SCRIPT_START)
181+
[ -n "$rate_limit_start" ] || rate_limit_start="$BKG_SCRIPT_START"
182+
[ -n "$rate_limit_start" ] || echo "BKG_SCRIPT_START empty!"
181183
script_limit_diff=$((rate_limit_end - rate_limit_start))
182184
((script_limit_diff < BKG_MAX_LEN)) || save_and_exit
183185
(($? != 3)) || return 3
@@ -281,6 +283,61 @@ run_parallel() {
281283
! grep -q "3" <<<"$code" || return 3
282284
}
283285

286+
parallel_shell_func() {
287+
[ -n "$1" ] || return
288+
[ -n "$2" ] || return
289+
local source_file=$1
290+
local function_name=$2
291+
shift 2
292+
293+
parallel "$@" bash "$BKG_ROOT/src/lib/parallel-worker.sh" "$source_file" "$function_name"
294+
}
295+
296+
parallel_async_status() {
297+
[ -n "$PARALLEL_ASYNC_EXIT_CODE" ] || return
298+
[ -f "$PARALLEL_ASYNC_EXIT_CODE" ] || return
299+
! grep -Fxq "3" "$PARALLEL_ASYNC_EXIT_CODE" || return 3
300+
}
301+
302+
parallel_async_submit() {
303+
[ -n "$1" ] || return
304+
[ -n "$2" ] || return
305+
306+
if [ -z "$PARALLEL_ASYNC_EXIT_CODE" ]; then
307+
PARALLEL_ASYNC_EXIT_CODE=$(mktemp)
308+
PARALLEL_ASYNC_MAX_JOBS=$(nproc --all)
309+
PARALLEL_ASYNC_RUNNING=0
310+
fi
311+
312+
parallel_async_status || return $?
313+
314+
while [ "$PARALLEL_ASYNC_RUNNING" -ge "$PARALLEL_ASYNC_MAX_JOBS" ]; do
315+
wait -n || :
316+
((PARALLEL_ASYNC_RUNNING--))
317+
parallel_async_status || return $?
318+
done
319+
320+
("$1" "$2" || printf '%s\n' "$?" >>"$PARALLEL_ASYNC_EXIT_CODE") &
321+
((PARALLEL_ASYNC_RUNNING++))
322+
}
323+
324+
parallel_async_wait() {
325+
local status=0
326+
327+
[ -n "$PARALLEL_ASYNC_EXIT_CODE" ] || return 0
328+
329+
while ((PARALLEL_ASYNC_RUNNING > 0)); do
330+
wait -n || :
331+
((PARALLEL_ASYNC_RUNNING--))
332+
parallel_async_status || status=$?
333+
done
334+
335+
parallel_async_status || status=$?
336+
rm -f "$PARALLEL_ASYNC_EXIT_CODE"
337+
unset PARALLEL_ASYNC_EXIT_CODE PARALLEL_ASYNC_MAX_JOBS PARALLEL_ASYNC_RUNNING
338+
return "$status"
339+
}
340+
284341
_jq() {
285342
echo "$1" | base64 --decode | jq -r "${@:2}"
286343
}
@@ -410,7 +467,7 @@ explore() {
410467
local is_repo=false
411468
local is_user=false
412469
local got_orgs=false
413-
[[ "$node" =~ .*\/.* ]] && is_repo=true || is_user=true
470+
[[ ! "$node" =~ .*\/.* ]] || is_repo=true
414471
[ "$is_repo" = true ] && local graph=("stargazers" "watchers" "forks" "collaborators") || local graph=("followers" "following" "people")
415472
[ -z "$2" ] || graph=("$2")
416473

0 commit comments

Comments
 (0)