Skip to content

Commit 255bcb2

Browse files
hoshinolinamarcan
authored andcommitted
rust: drm: sched: Add GPU scheduler abstraction
The GPU scheduler manages scheduling GPU jobs and dependencies between them. This Rust abstraction allows Rust DRM drivers to use this functionality. Signed-off-by: Asahi Lina <lina@asahilina.net>
1 parent 75eb07d commit 255bcb2

4 files changed

Lines changed: 351 additions & 0 deletions

File tree

rust/bindings/bindings_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <drm/drm_gem_shmem_helper.h>
1414
#include <drm/drm_ioctl.h>
1515
#include <drm/drm_syncobj.h>
16+
#include <drm/gpu_scheduler.h>
1617
#include <linux/delay.h>
1718
#include <linux/device.h>
1819
#include <linux/dma-fence.h>

rust/helpers.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,12 @@ int rust_helper_sg_dma_len(const struct scatterlist *sg)
439439
}
440440
EXPORT_SYMBOL_GPL(rust_helper_sg_dma_len);
441441

442+
unsigned long rust_helper_msecs_to_jiffies(const unsigned int m)
443+
{
444+
return msecs_to_jiffies(m);
445+
}
446+
EXPORT_SYMBOL_GPL(rust_helper_msecs_to_jiffies);
447+
442448
#ifdef CONFIG_DMA_SHARED_BUFFER
443449

444450
void rust_helper_dma_fence_get(struct dma_fence *fence)

rust/kernel/drm/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ pub mod file;
88
pub mod gem;
99
pub mod ioctl;
1010
pub mod mm;
11+
pub mod sched;
1112
pub mod syncobj;

rust/kernel/drm/sched.rs

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR MIT
2+
3+
//! DRM Scheduler
4+
//!
5+
//! C header: [`include/linux/drm/gpu_scheduler.h`](../../../../include/linux/drm/gpu_scheduler.h)
6+
7+
use crate::{
8+
bindings, device,
9+
dma_fence::*,
10+
error::{to_result, Result},
11+
prelude::*,
12+
sync::{Arc, UniqueArc},
13+
};
14+
use alloc::boxed::Box;
15+
use core::marker::PhantomData;
16+
use core::mem::MaybeUninit;
17+
use core::ops::{Deref, DerefMut};
18+
use core::ptr::addr_of_mut;
19+
20+
/// Scheduler status after timeout recovery
21+
#[repr(u32)]
22+
pub enum Status {
23+
/// Device recovered from the timeout and can execute jobs again
24+
Nominal = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_NOMINAL,
25+
/// Device is no longer available
26+
NoDevice = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_ENODEV,
27+
}
28+
29+
/// Scheduler priorities
30+
#[repr(i32)]
31+
pub enum Priority {
32+
/// Low userspace priority
33+
Min = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_MIN,
34+
/// Normal userspace priority
35+
Normal = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_NORMAL,
36+
/// High userspace priority
37+
High = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_HIGH,
38+
/// Kernel priority (highest)
39+
Kernel = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_KERNEL,
40+
}
41+
42+
/// Trait to be implemented by driver job objects.
43+
pub trait JobImpl: Sized {
44+
/// Called when the scheduler is considering scheduling this job next, to get another Fence
45+
/// for this job to block on. Once it returns None, run() may be called.
46+
fn prepare(_job: &mut Job<Self>) -> Option<Fence> {
47+
None // Equivalent to NULL function pointer
48+
}
49+
50+
/// Called to execute the job once all of the dependencies have been resolved. This may be
51+
/// called multiple times, if timed_out() has happened and drm_sched_job_recovery() decides
52+
/// to try it again.
53+
fn run(job: &mut Job<Self>) -> Result<Option<Fence>>;
54+
55+
/// Called when a job has taken too long to execute, to trigger GPU recovery.
56+
///
57+
/// This method is called in a workqueue context.
58+
fn timed_out(job: &mut Job<Self>) -> Status;
59+
}
60+
61+
unsafe extern "C" fn prepare_job_cb<T: JobImpl>(
62+
sched_job: *mut bindings::drm_sched_job,
63+
_s_entity: *mut bindings::drm_sched_entity,
64+
) -> *mut bindings::dma_fence {
65+
// SAFETY: All of our jobs are Job<T>.
66+
let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
67+
68+
match T::prepare(unsafe { &mut *p }) {
69+
None => core::ptr::null_mut(),
70+
Some(fence) => fence.into_raw(),
71+
}
72+
}
73+
74+
unsafe extern "C" fn run_job_cb<T: JobImpl>(
75+
sched_job: *mut bindings::drm_sched_job,
76+
) -> *mut bindings::dma_fence {
77+
// SAFETY: All of our jobs are Job<T>.
78+
let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
79+
80+
match T::run(unsafe { &mut *p }) {
81+
Err(e) => e.to_ptr(),
82+
Ok(None) => core::ptr::null_mut(),
83+
Ok(Some(fence)) => fence.into_raw(),
84+
}
85+
}
86+
87+
unsafe extern "C" fn timedout_job_cb<T: JobImpl>(
88+
sched_job: *mut bindings::drm_sched_job,
89+
) -> bindings::drm_gpu_sched_stat {
90+
// SAFETY: All of our jobs are Job<T>.
91+
let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
92+
93+
T::timed_out(unsafe { &mut *p }) as bindings::drm_gpu_sched_stat
94+
}
95+
96+
unsafe extern "C" fn free_job_cb<T: JobImpl>(sched_job: *mut bindings::drm_sched_job) {
97+
// SAFETY: All of our jobs are Job<T>.
98+
let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
99+
100+
// Convert the job back to a Box and drop it
101+
// SAFETY: All of our Job<T>s are created inside a box.
102+
unsafe { Box::from_raw(p) };
103+
}
104+
105+
/// A DRM scheduler job.
106+
pub struct Job<T: JobImpl> {
107+
job: bindings::drm_sched_job,
108+
inner: T,
109+
}
110+
111+
impl<T: JobImpl> Deref for Job<T> {
112+
type Target = T;
113+
114+
fn deref(&self) -> &Self::Target {
115+
&self.inner
116+
}
117+
}
118+
119+
impl<T: JobImpl> DerefMut for Job<T> {
120+
fn deref_mut(&mut self) -> &mut Self::Target {
121+
&mut self.inner
122+
}
123+
}
124+
125+
impl<T: JobImpl> Drop for Job<T> {
126+
fn drop(&mut self) {
127+
// SAFETY: At this point the job has either been submitted and this is being called from
128+
// `free_job_cb` above, or it hasn't and it is safe to call `drm_sched_job_cleanup`.
129+
unsafe { bindings::drm_sched_job_cleanup(&mut self.job) };
130+
}
131+
}
132+
133+
/// A pending DRM scheduler job (not yet armed)
134+
pub struct PendingJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>);
135+
136+
impl<'a, T: JobImpl> PendingJob<'a, T> {
137+
/// Add a fence as a dependency to the job
138+
pub fn add_dependency(&mut self, fence: Fence) -> Result {
139+
to_result(unsafe {
140+
bindings::drm_sched_job_add_dependency(&mut self.0.job, fence.into_raw())
141+
})
142+
}
143+
144+
/// Arm the job to make it ready for execution
145+
pub fn arm(mut self) -> ArmedJob<'a, T> {
146+
unsafe { bindings::drm_sched_job_arm(&mut self.0.job) };
147+
ArmedJob(self.0, PhantomData)
148+
}
149+
}
150+
151+
impl<'a, T: JobImpl> Deref for PendingJob<'a, T> {
152+
type Target = Job<T>;
153+
154+
fn deref(&self) -> &Self::Target {
155+
&self.0
156+
}
157+
}
158+
159+
impl<'a, T: JobImpl> DerefMut for PendingJob<'a, T> {
160+
fn deref_mut(&mut self) -> &mut Self::Target {
161+
&mut self.0
162+
}
163+
}
164+
165+
/// An armed DRM scheduler job (not yet submitted)
166+
pub struct ArmedJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>);
167+
168+
impl<'a, T: JobImpl> ArmedJob<'a, T> {
169+
/// Returns the job fences
170+
pub fn fences(&self) -> JobFences<'_> {
171+
JobFences(unsafe { &mut *self.0.job.s_fence })
172+
}
173+
174+
/// Push the job for execution into the scheduler
175+
pub fn push(self) {
176+
// After this point, the job is submitted and owned by the scheduler
177+
let ptr = match self {
178+
ArmedJob(job, _) => Box::<Job<T>>::into_raw(job),
179+
};
180+
181+
// SAFETY: We are passing in ownership of a valid Box raw pointer.
182+
unsafe { bindings::drm_sched_entity_push_job(addr_of_mut!((*ptr).job)) };
183+
}
184+
}
185+
impl<'a, T: JobImpl> Deref for ArmedJob<'a, T> {
186+
type Target = Job<T>;
187+
188+
fn deref(&self) -> &Self::Target {
189+
&self.0
190+
}
191+
}
192+
193+
impl<'a, T: JobImpl> DerefMut for ArmedJob<'a, T> {
194+
fn deref_mut(&mut self) -> &mut Self::Target {
195+
&mut self.0
196+
}
197+
}
198+
199+
/// Reference to the bundle of fences attached to a DRM scheduler job
200+
pub struct JobFences<'a>(&'a mut bindings::drm_sched_fence);
201+
202+
impl<'a> JobFences<'a> {
203+
/// Returns a new reference to the job scheduled fence.
204+
pub fn scheduled(&mut self) -> Fence {
205+
unsafe { Fence::get_raw(&mut self.0.scheduled) }
206+
}
207+
208+
/// Returns a new reference to the job finished fence.
209+
pub fn finished(&mut self) -> Fence {
210+
unsafe { Fence::get_raw(&mut self.0.finished) }
211+
}
212+
}
213+
214+
struct EntityInner<T: JobImpl> {
215+
entity: bindings::drm_sched_entity,
216+
// TODO: Allow users to share guilty flag between entities
217+
sched: Arc<SchedulerInner<T>>,
218+
guilty: bindings::atomic_t,
219+
_p: PhantomData<T>,
220+
}
221+
222+
impl<T: JobImpl> Drop for EntityInner<T> {
223+
fn drop(&mut self) {
224+
// SAFETY: The EntityInner is initialized. This will cancel/free all jobs.
225+
unsafe { bindings::drm_sched_entity_destroy(&mut self.entity) };
226+
}
227+
}
228+
229+
// SAFETY: TODO
230+
unsafe impl<T: JobImpl> Sync for EntityInner<T> {}
231+
unsafe impl<T: JobImpl> Send for EntityInner<T> {}
232+
233+
/// A DRM scheduler entity.
234+
pub struct Entity<T: JobImpl>(Pin<Box<EntityInner<T>>>);
235+
236+
impl<T: JobImpl> Entity<T> {
237+
/// Create a new scheduler entity.
238+
pub fn new(sched: &Scheduler<T>, priority: Priority) -> Result<Self> {
239+
let mut entity: Box<MaybeUninit<EntityInner<T>>> = Box::try_new_zeroed()?;
240+
241+
let mut sched_ptr = &sched.0.sched as *const _ as *mut _;
242+
243+
// SAFETY: The Box is allocated above and valid.
244+
unsafe {
245+
bindings::drm_sched_entity_init(
246+
addr_of_mut!((*entity.as_mut_ptr()).entity),
247+
priority as _,
248+
&mut sched_ptr,
249+
1,
250+
addr_of_mut!((*entity.as_mut_ptr()).guilty),
251+
)
252+
};
253+
254+
// SAFETY: The Box is allocated above and valid.
255+
unsafe { addr_of_mut!((*entity.as_mut_ptr()).sched).write(sched.0.clone()) };
256+
257+
// SAFETY: entity is now initialized.
258+
Ok(Self(Pin::from(unsafe { entity.assume_init() })))
259+
}
260+
261+
/// Create a new job on this entity.
262+
///
263+
/// The entity must outlive the pending job until it transitions into the submitted state,
264+
/// after which the scheduler owns it.
265+
pub fn new_job(&self, inner: T) -> Result<PendingJob<'_, T>> {
266+
let mut job: Box<MaybeUninit<Job<T>>> = Box::try_new_zeroed()?;
267+
268+
// SAFETY: We hold a reference to the entity (which is a valid pointer),
269+
// and the job object was just allocated above.
270+
to_result(unsafe {
271+
bindings::drm_sched_job_init(
272+
addr_of_mut!((*job.as_mut_ptr()).job),
273+
&self.0.as_ref().get_ref().entity as *const _ as *mut _,
274+
core::ptr::null_mut(),
275+
)
276+
})?;
277+
278+
// SAFETY: The Box pointer is valid, and this initializes the inner member.
279+
unsafe { addr_of_mut!((*job.as_mut_ptr()).inner).write(inner) };
280+
281+
// SAFETY: All fields of the Job<T> are now initialized.
282+
Ok(PendingJob(unsafe { job.assume_init() }, PhantomData))
283+
}
284+
}
285+
286+
/// DRM scheduler inner data
287+
pub struct SchedulerInner<T: JobImpl> {
288+
sched: bindings::drm_gpu_scheduler,
289+
_p: PhantomData<T>,
290+
}
291+
292+
impl<T: JobImpl> Drop for SchedulerInner<T> {
293+
fn drop(&mut self) {
294+
// SAFETY: The scheduler is valid. This assumes drm_sched_fini() will take care of
295+
// freeing all in-progress jobs.
296+
unsafe { bindings::drm_sched_fini(&mut self.sched) };
297+
}
298+
}
299+
300+
// SAFETY: TODO
301+
unsafe impl<T: JobImpl> Sync for SchedulerInner<T> {}
302+
unsafe impl<T: JobImpl> Send for SchedulerInner<T> {}
303+
304+
/// A DRM Scheduler
305+
pub struct Scheduler<T: JobImpl>(Arc<SchedulerInner<T>>);
306+
307+
impl<T: JobImpl> Scheduler<T> {
308+
const OPS: bindings::drm_sched_backend_ops = bindings::drm_sched_backend_ops {
309+
prepare_job: Some(prepare_job_cb::<T>),
310+
run_job: Some(run_job_cb::<T>),
311+
timedout_job: Some(timedout_job_cb::<T>),
312+
free_job: Some(free_job_cb::<T>),
313+
};
314+
/// Creates a new DRM Scheduler object
315+
// TODO: Shared timeout workqueues & scores
316+
pub fn new(
317+
device: &impl device::RawDevice,
318+
hw_submission: u32,
319+
hang_limit: u32,
320+
timeout_ms: usize,
321+
name: &'static CStr,
322+
) -> Result<Scheduler<T>> {
323+
let mut sched: UniqueArc<MaybeUninit<SchedulerInner<T>>> = UniqueArc::try_new_uninit()?;
324+
325+
// SAFETY: The drm_sched pointer is valid and pinned as it was just allocated above.
326+
to_result(unsafe {
327+
bindings::drm_sched_init(
328+
addr_of_mut!((*sched.as_mut_ptr()).sched),
329+
&Self::OPS,
330+
hw_submission,
331+
hang_limit,
332+
bindings::msecs_to_jiffies(timeout_ms.try_into()?).try_into()?,
333+
core::ptr::null_mut(),
334+
core::ptr::null_mut(),
335+
name.as_char_ptr(),
336+
device.raw_device(),
337+
)
338+
})?;
339+
340+
// SAFETY: All fields of SchedulerInner are now initialized.
341+
Ok(Scheduler(unsafe { sched.assume_init() }.into()))
342+
}
343+
}

0 commit comments

Comments
 (0)