Skip to content

Commit 563d1ec

Browse files
committed
Fix replicator scheduler total jobs metric
Previously, we didn't always remember to update the total job stats gauge, so it was possible for it to becomes stale. Periodic scheduler refresh updated all the other guages but didn't update the total. To fix it make sure to update the stat in more places (on jobs removes and adds) and most importantly, add it to periodic stat refresh function, so even if we still missed it should eventually catch up after rescheduling cycle.
1 parent b4e58f1 commit 563d1ec

1 file changed

Lines changed: 11 additions & 8 deletions

File tree

src/couch_replicator/src/couch_replicator_scheduler.erl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,7 @@ handle_call({add_job, Job}, _From, State) ->
258258
true = add_job_int(Job),
259259
ok = maybe_start_newly_added_job(Job, State),
260260
couch_stats:increment_counter([couch_replicator, jobs, adds]),
261-
TotalJobs = ets:info(?MODULE, size),
262-
couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs),
261+
update_total_jobs_stats(),
263262
{reply, ok, State};
264263
handle_call({remove_job, Id}, _From, State) ->
265264
ok = maybe_remove_job_int(Id, State),
@@ -464,6 +463,7 @@ handle_crashed_job(Job, Reason, State) ->
464463
update_running_jobs_stats(State#state.stats_pid),
465464
ok;
466465
false ->
466+
update_total_jobs_stats(),
467467
ok
468468
end.
469469

@@ -480,6 +480,7 @@ maybe_start_newly_added_job(Job, State) ->
480480
update_running_jobs_stats(State#state.stats_pid),
481481
ok;
482482
false ->
483+
update_total_jobs_stats(),
483484
ok
484485
end.
485486

@@ -655,16 +656,13 @@ maybe_remove_job_int(JobId, State) ->
655656
ok = stop_job_int(Job, State),
656657
true = remove_job_int(Job),
657658
couch_stats:increment_counter([couch_replicator, jobs, removes]),
658-
TotalJobs = ets:info(?MODULE, size),
659-
couch_stats:update_gauge(
660-
[couch_replicator, jobs, total],
661-
TotalJobs
662-
),
663659
update_running_jobs_stats(State#state.stats_pid),
664660
ok;
665661
{error, not_found} ->
666662
ok
667-
end.
663+
end,
664+
update_total_jobs_stats(),
665+
ok.
668666

669667
start_job_int(#job{pid = Pid}, _State) when Pid /= undefined ->
670668
ok;
@@ -964,6 +962,7 @@ stats_updater_refresh() ->
964962
couch_stats:update_gauge([couch_replicator, jobs, pending], PendingN),
965963
couch_stats:update_gauge([couch_replicator, jobs, running], RunningN),
966964
couch_stats:update_gauge([couch_replicator, jobs, crashed], CrashedN),
965+
update_total_jobs_stats(),
967966
ok.
968967

969968
-spec stats_fold(#job{}, #stats_acc{}) -> #stats_acc{}.
@@ -976,6 +975,10 @@ stats_fold(#job{pid = undefined, history = [{{crashed, _}, _} | _]}, Acc) ->
976975
stats_fold(#job{pid = P, history = [{started, _} | _]}, Acc) when is_pid(P) ->
977976
Acc#stats_acc{running_n = Acc#stats_acc.running_n + 1}.
978977

978+
update_total_jobs_stats() ->
979+
TotalJobs = ets:info(?MODULE, size),
980+
couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs).
981+
979982
-spec existing_replication(#rep{}) -> boolean().
980983
existing_replication(#rep{} = NewRep) ->
981984
case job_by_id(NewRep#rep.id) of

0 commit comments

Comments
 (0)