@@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
377377 */
378378static int posix_cpu_timer_create (struct k_itimer * new_timer )
379379{
380+ static struct lock_class_key posix_cpu_timers_key ;
380381 struct pid * pid ;
381382
382383 rcu_read_lock ();
@@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
386387 return - EINVAL ;
387388 }
388389
390+ /*
391+ * If posix timer expiry is handled in task work context then
392+ * timer::it_lock can be taken without disabling interrupts as all
393+ * other locking happens in task context. This requires a seperate
394+ * lock class key otherwise regular posix timer expiry would record
395+ * the lock class being taken in interrupt context and generate a
396+ * false positive warning.
397+ */
398+ if (IS_ENABLED (CONFIG_POSIX_CPU_TIMERS_TASK_WORK ))
399+ lockdep_set_class (& new_timer -> it_lock , & posix_cpu_timers_key );
400+
389401 new_timer -> kclock = & clock_posix_cpu ;
390402 timerqueue_init (& new_timer -> it .cpu .node );
391403 new_timer -> it .cpu .pid = get_pid (pid );
@@ -1080,43 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
10801092 return false;
10811093}
10821094
1095+ static void handle_posix_cpu_timers (struct task_struct * tsk );
1096+
1097+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1098+ static void posix_cpu_timers_work (struct callback_head * work )
1099+ {
1100+ handle_posix_cpu_timers (current );
1101+ }
1102+
10831103/*
1084- * This is called from the timer interrupt handler. The irq handler has
1085- * already updated our counts. We need to check if any timers fire now.
1086- * Interrupts are disabled.
1104+ * Initialize posix CPU timers task work in init task. Out of line to
1105+ * keep the callback static and to avoid header recursion hell.
10871106 */
1088- void run_posix_cpu_timers (void )
1107+ void __init posix_cputimers_init_work (void )
10891108{
1090- struct task_struct * tsk = current ;
1091- struct k_itimer * timer , * next ;
1092- unsigned long flags ;
1093- LIST_HEAD (firing );
1109+ init_task_work (& current -> posix_cputimers_work .work ,
1110+ posix_cpu_timers_work );
1111+ }
10941112
1095- lockdep_assert_irqs_disabled ();
1113+ /*
1114+ * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
1115+ * in hard interrupt context or in task context with interrupts
1116+ * disabled. Aside of that the writer/reader interaction is always in the
1117+ * context of the current task, which means they are strict per CPU.
1118+ */
1119+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1120+ {
1121+ return tsk -> posix_cputimers_work .scheduled ;
1122+ }
10961123
1097- /*
1098- * The fast path checks that there are no expired thread or thread
1099- * group timers. If that's so, just return.
1100- */
1101- if (!fastpath_timer_check (tsk ))
1124+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1125+ {
1126+ if (WARN_ON_ONCE (tsk -> posix_cputimers_work .scheduled ))
11021127 return ;
11031128
1104- lockdep_posixtimer_enter ();
1105- if (!lock_task_sighand (tsk , & flags )) {
1106- lockdep_posixtimer_exit ();
1107- return ;
1129+ /* Schedule task work to actually expire the timers */
1130+ tsk -> posix_cputimers_work .scheduled = true;
1131+ task_work_add (tsk , & tsk -> posix_cputimers_work .work , TWA_RESUME );
1132+ }
1133+
1134+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1135+ unsigned long start )
1136+ {
1137+ bool ret = true;
1138+
1139+ /*
1140+ * On !RT kernels interrupts are disabled while collecting expired
1141+ * timers, so no tick can happen and the fast path check can be
1142+ * reenabled without further checks.
1143+ */
1144+ if (!IS_ENABLED (CONFIG_PREEMPT_RT )) {
1145+ tsk -> posix_cputimers_work .scheduled = false;
1146+ return true;
11081147 }
1148+
11091149 /*
1110- * Here we take off tsk->signal->cpu_timers[N] and
1111- * tsk->cpu_timers[N] all the timers that are firing, and
1112- * put them on the firing list.
1150+ * On RT enabled kernels ticks can happen while the expired timers
1151+ * are collected under sighand lock. But any tick which observes
1152+ * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
1153+ * checks. So reenabling the tick work has do be done carefully:
1154+ *
1155+ * Disable interrupts and run the fast path check if jiffies have
1156+ * advanced since the collecting of expired timers started. If
1157+ * jiffies have not advanced or the fast path check did not find
1158+ * newly expired timers, reenable the fast path check in the timer
1159+ * interrupt. If there are newly expired timers, return false and
1160+ * let the collection loop repeat.
11131161 */
1114- check_thread_timers (tsk , & firing );
1162+ local_irq_disable ();
1163+ if (start != jiffies && fastpath_timer_check (tsk ))
1164+ ret = false;
1165+ else
1166+ tsk -> posix_cputimers_work .scheduled = false;
1167+ local_irq_enable ();
1168+
1169+ return ret ;
1170+ }
1171+ #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1172+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1173+ {
1174+ lockdep_posixtimer_enter ();
1175+ handle_posix_cpu_timers (tsk );
1176+ lockdep_posixtimer_exit ();
1177+ }
1178+
1179+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1180+ {
1181+ return false;
1182+ }
1183+
1184+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1185+ unsigned long start )
1186+ {
1187+ return true;
1188+ }
1189+ #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1190+
1191+ static void handle_posix_cpu_timers (struct task_struct * tsk )
1192+ {
1193+ struct k_itimer * timer , * next ;
1194+ unsigned long flags , start ;
1195+ LIST_HEAD (firing );
1196+
1197+ if (!lock_task_sighand (tsk , & flags ))
1198+ return ;
11151199
1116- check_process_timers (tsk , & firing );
1200+ do {
1201+ /*
1202+ * On RT locking sighand lock does not disable interrupts,
1203+ * so this needs to be careful vs. ticks. Store the current
1204+ * jiffies value.
1205+ */
1206+ start = READ_ONCE (jiffies );
1207+ barrier ();
1208+
1209+ /*
1210+ * Here we take off tsk->signal->cpu_timers[N] and
1211+ * tsk->cpu_timers[N] all the timers that are firing, and
1212+ * put them on the firing list.
1213+ */
1214+ check_thread_timers (tsk , & firing );
1215+
1216+ check_process_timers (tsk , & firing );
1217+
1218+ /*
1219+ * The above timer checks have updated the exipry cache and
1220+ * because nothing can have queued or modified timers after
1221+ * sighand lock was taken above it is guaranteed to be
1222+ * consistent. So the next timer interrupt fastpath check
1223+ * will find valid data.
1224+ *
1225+ * If timer expiry runs in the timer interrupt context then
1226+ * the loop is not relevant as timers will be directly
1227+ * expired in interrupt context. The stub function below
1228+ * returns always true which allows the compiler to
1229+ * optimize the loop out.
1230+ *
1231+ * If timer expiry is deferred to task work context then
1232+ * the following rules apply:
1233+ *
1234+ * - On !RT kernels no tick can have happened on this CPU
1235+ * after sighand lock was acquired because interrupts are
1236+ * disabled. So reenabling task work before dropping
1237+ * sighand lock and reenabling interrupts is race free.
1238+ *
1239+ * - On RT kernels ticks might have happened but the tick
1240+ * work ignored posix CPU timer handling because the
1241+ * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
1242+ * must be done very carefully including a check whether
1243+ * ticks have happened since the start of the timer
1244+ * expiry checks. posix_cpu_timers_enable_work() takes
1245+ * care of that and eventually lets the expiry checks
1246+ * run again.
1247+ */
1248+ } while (!posix_cpu_timers_enable_work (tsk , start ));
11171249
11181250 /*
1119- * We must release these locks before taking any timer's lock.
1251+ * We must release sighand lock before taking any timer's lock.
11201252 * There is a potential race with timer deletion here, as the
11211253 * siglock now protects our private firing list. We have set
11221254 * the firing flag in each timer, so that a deletion attempt
@@ -1134,6 +1266,13 @@ void run_posix_cpu_timers(void)
11341266 list_for_each_entry_safe (timer , next , & firing , it .cpu .elist ) {
11351267 int cpu_firing ;
11361268
1269+ /*
1270+ * spin_lock() is sufficient here even independent of the
1271+ * expiry context. If expiry happens in hard interrupt
1272+ * context it's obvious. For task work context it's safe
1273+ * because all other operations on timer::it_lock happen in
1274+ * task context (syscall or exit).
1275+ */
11371276 spin_lock (& timer -> it_lock );
11381277 list_del_init (& timer -> it .cpu .elist );
11391278 cpu_firing = timer -> it .cpu .firing ;
@@ -1147,7 +1286,34 @@ void run_posix_cpu_timers(void)
11471286 cpu_timer_fire (timer );
11481287 spin_unlock (& timer -> it_lock );
11491288 }
1150- lockdep_posixtimer_exit ();
1289+ }
1290+
1291+ /*
1292+ * This is called from the timer interrupt handler. The irq handler has
1293+ * already updated our counts. We need to check if any timers fire now.
1294+ * Interrupts are disabled.
1295+ */
1296+ void run_posix_cpu_timers (void )
1297+ {
1298+ struct task_struct * tsk = current ;
1299+
1300+ lockdep_assert_irqs_disabled ();
1301+
1302+ /*
1303+ * If the actual expiry is deferred to task work context and the
1304+ * work is already scheduled there is no point to do anything here.
1305+ */
1306+ if (posix_cpu_timers_work_scheduled (tsk ))
1307+ return ;
1308+
1309+ /*
1310+ * The fast path checks that there are no expired thread or thread
1311+ * group timers. If that's so, just return.
1312+ */
1313+ if (!fastpath_timer_check (tsk ))
1314+ return ;
1315+
1316+ __run_posix_cpu_timers (tsk );
11511317}
11521318
11531319/*
0 commit comments