diff --git a/etc/syscalls_linux_aarch64.md b/etc/syscalls_linux_aarch64.md index 8696d05e..d1f54323 100644 --- a/etc/syscalls_linux_aarch64.md +++ b/etc/syscalls_linux_aarch64.md @@ -216,7 +216,7 @@ | 0xd5 (213) | readahead | (int fd, loff_t offset, size_t count) | __arm64_sys_readahead | false | | 0xd6 (214) | brk | (unsigned long brk) | __arm64_sys_brk | true | | 0xd7 (215) | munmap | (unsigned long addr, size_t len) | __arm64_sys_munmap | true | -| 0xd8 (216) | mremap | (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) | __arm64_sys_mremap | false | +| 0xd8 (216) | mremap | (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) | __arm64_sys_mremap | true | | 0xd9 (217) | add_key | (const char *_type, const char *_description, const void *_payload, size_t plen, key_serial_t ringid) | __arm64_sys_add_key | false | | 0xda (218) | request_key | (const char *_type, const char *_description, const char *_callout_info, key_serial_t destringid) | __arm64_sys_request_key | false | | 0xdb (219) | keyctl | (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) | __arm64_sys_keyctl | false | diff --git a/libkernel/src/memory/proc_vm/memory_map/mod.rs b/libkernel/src/memory/proc_vm/memory_map/mod.rs index 508f8e66..a80c5828 100644 --- a/libkernel/src/memory/proc_vm/memory_map/mod.rs +++ b/libkernel/src/memory/proc_vm/memory_map/mod.rs @@ -37,6 +37,17 @@ pub enum AddressRequest { }, } +/// Describes where an `mremap` operation may place the remapped VMA. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RemapDestination { + /// Resize in place only. + InPlaceOnly, + /// Resize in place if possible, otherwise move to any free region. + MayMove, + /// Move the mapping to exactly this address. + Fixed(VA), +} + impl MemoryMap { /// Creates a new, empty address space. pub fn new() -> Result { @@ -243,6 +254,257 @@ impl MemoryMap { Err(KernelError::NoMemory) } + /// Remaps an existing mapping + pub fn mremap( + &mut self, + old_addr: VA, + old_len: usize, + new_len: usize, + destination: RemapDestination, + ) -> Result<(VA, Vec)> { + if !old_addr.is_page_aligned() || old_len == 0 || new_len == 0 { + return Err(KernelError::InvalidValue); + } + + let old_len = Self::align_len(old_len); + let new_len = Self::align_len(new_len); + let old_region = VirtMemoryRegion::new(old_addr, old_len); + + let source_vma = self.find_vma(old_addr).cloned().ok_or(KernelError::Fault)?; + + if old_region.end_address() > source_vma.region.end_address() { + return Err(KernelError::Fault); + } + + if let RemapDestination::Fixed(new_addr) = destination { + if !new_addr.is_page_aligned() { + return Err(KernelError::InvalidValue); + } + + let new_region = VirtMemoryRegion::new(new_addr, new_len); + + if new_region.overlaps(old_region) || new_region.overlaps(source_vma.region) { + return Err(KernelError::InvalidValue); + } + } + + if old_len == new_len && !matches!(destination, RemapDestination::Fixed(_)) { + return Ok((old_addr, Vec::new())); + } + + if let RemapDestination::Fixed(new_addr) = destination { + return self.move_selected_mapping( + source_vma, + old_region, + VirtMemoryRegion::new(new_addr, new_len), + true, + ); + } + + if new_len <= old_len { + return self.shrink_in_place(source_vma, old_region, new_len); + } + + if self.can_expand_in_place(&source_vma, old_region, new_len) { + return self.expand_in_place(source_vma, old_region, new_len); + } + + let new_region = match destination { + RemapDestination::InPlaceOnly => return Err(KernelError::NoMemory), + RemapDestination::MayMove => self + .find_free_region(new_len) + .ok_or(KernelError::NoMemory)?, + RemapDestination::Fixed(_) => unreachable!(), + }; + + self.move_selected_mapping( + source_vma, + old_region, + new_region, + matches!(destination, RemapDestination::Fixed(_)), + ) + } + + fn align_len(len: usize) -> usize { + if len & PAGE_MASK != 0 { + (len & !PAGE_MASK) + PAGE_SIZE + } else { + len + } + } + + fn can_expand_in_place( + &self, + source_vma: &VMArea, + old_region: VirtMemoryRegion, + new_len: usize, + ) -> bool { + let new_end = old_region.start_address().add_bytes(new_len); + + if new_end <= source_vma.region.end_address() { + return true; + } + + self.is_region_free(VirtMemoryRegion::from_start_end_address( + source_vma.region.end_address(), + new_end, + )) + } + + fn expand_in_place( + &mut self, + source_vma: VMArea, + old_region: VirtMemoryRegion, + new_len: usize, + ) -> Result<(VA, Vec)> { + let new_end = old_region.start_address().add_bytes(new_len); + + if new_end <= source_vma.region.end_address() { + return Ok((old_region.start_address(), Vec::new())); + } + + self.vmas + .remove(&source_vma.region.start_address()) + .unwrap(); + + let mut expanded_vma = source_vma; + expanded_vma.region = + VirtMemoryRegion::from_start_end_address(expanded_vma.region.start_address(), new_end); + + self.merge_vma(expanded_vma); + + Ok((old_region.start_address(), Vec::new())) + } + + fn shrink_in_place( + &mut self, + source_vma: VMArea, + old_region: VirtMemoryRegion, + new_len: usize, + ) -> Result<(VA, Vec)> { + let new_region = VirtMemoryRegion::new(old_region.start_address(), new_len); + let removed_region = VirtMemoryRegion::from_start_end_address( + new_region.end_address(), + old_region.end_address(), + ); + + let freed_pages = self.address_space.unmap_range(removed_region)?; + + self.vmas + .remove(&source_vma.region.start_address()) + .unwrap(); + + if source_vma.region.start_address() < old_region.start_address() { + self.merge_vma( + source_vma.shrink_to(VirtMemoryRegion::from_start_end_address( + source_vma.region.start_address(), + old_region.start_address(), + )), + ); + } + + self.merge_vma(source_vma.shrink_to(new_region)); + + if old_region.end_address() < source_vma.region.end_address() { + self.merge_vma( + source_vma.shrink_to(VirtMemoryRegion::from_start_end_address( + old_region.end_address(), + source_vma.region.end_address(), + )), + ); + } + + Ok((old_region.start_address(), freed_pages)) + } + + fn relocate_vma(vma: VMArea, new_region: VirtMemoryRegion) -> VMArea { + let mut moved_vma = vma; + moved_vma.region = new_region; + + if let VMAreaKind::File(mapping) = &mut moved_vma.kind { + mapping.len = core::cmp::min(mapping.len, new_region.size() as u64); + } + + moved_vma + } + + fn move_selected_mapping( + &mut self, + source_vma: VMArea, + old_region: VirtMemoryRegion, + new_region: VirtMemoryRegion, + clobber_target: bool, + ) -> Result<(VA, Vec)> { + let mut freed_pages = Vec::new(); + + if clobber_target { + freed_pages.append(&mut self.unmap_region(new_region, None)?); + } + + let preserved_len = core::cmp::min(old_region.size(), new_region.size()); + let mut newly_mapped = Vec::new(); + + if preserved_len != 0 { + let preserved_old = VirtMemoryRegion::new(old_region.start_address(), preserved_len); + let preserved_new = VirtMemoryRegion::new(new_region.start_address(), preserved_len); + + for (old_page, new_page) in preserved_old.iter_pages().zip(preserved_new.iter_pages()) { + if let Some(page_info) = self.address_space.translate(old_page) { + if let Err(err) = + self.address_space + .map_page(page_info.pfn, new_page, page_info.perms) + { + for mapped_page in newly_mapped { + let _ = self.address_space.unmap(mapped_page); + } + + return Err(err); + } + + newly_mapped.push(new_page); + } + } + + let _ = self.address_space.unmap_range(preserved_old)?; + } + + if old_region.size() > preserved_len { + freed_pages.append(&mut self.address_space.unmap_range( + VirtMemoryRegion::from_start_end_address( + old_region.start_address().add_bytes(preserved_len), + old_region.end_address(), + ), + )?); + } + + self.vmas + .remove(&source_vma.region.start_address()) + .unwrap(); + + if source_vma.region.start_address() < old_region.start_address() { + self.merge_vma( + source_vma.shrink_to(VirtMemoryRegion::from_start_end_address( + source_vma.region.start_address(), + old_region.start_address(), + )), + ); + } + + if old_region.end_address() < source_vma.region.end_address() { + self.merge_vma( + source_vma.shrink_to(VirtMemoryRegion::from_start_end_address( + old_region.end_address(), + source_vma.region.end_address(), + )), + ); + } + + let selected_vma = source_vma.shrink_to(old_region); + self.merge_vma(Self::relocate_vma(selected_vma, new_region)); + + Ok((new_region.start_address(), freed_pages)) + } + /// Checks if a given virtual memory region is completely free. fn is_region_free(&self, region: VirtMemoryRegion) -> bool { // Find the VMA that might overlap with the start of our desired region. @@ -304,9 +566,12 @@ impl MemoryMap { /// Inserts a new VMA, handling overlaps and merging it with neighbors if /// possible. - pub(super) fn insert_and_merge(&mut self, mut vma: VMArea) { + pub(super) fn insert_and_merge(&mut self, vma: VMArea) { let _ = self.unmap_region(vma.region, Some(vma.clone())); + self.merge_vma(vma); + } + fn merge_vma(&mut self, mut vma: VMArea) { // Try to merge with next VMA. if let Some(next_vma) = self.vmas.get(&vma.region.end_address()) && vma.can_merge_with(next_vma) @@ -320,7 +585,6 @@ impl MemoryMap { .unwrap() // Should not fail, as we just got this VMA. .region; vma.region.expand_by(next_vma_region.size()); - // `vma` now represents the merged region of [new, next]. } // Try to merge with the previous VMA. @@ -328,16 +592,14 @@ impl MemoryMap { .vmas .range_mut(..vma.region.start_address()) .next_back() - { // Check if it's contiguous and compatible. - if prev_vma.region.end_address() == vma.region.start_address() - && prev_vma.can_merge_with(&vma) - { - // The VMAs are mergeable. Expand the previous VMA to absorb the - // new one's region. - prev_vma.region.expand_by(vma.region.size()); - return; - } + && prev_vma.region.end_address() == vma.region.start_address() + && prev_vma.can_merge_with(&vma) + { + // The VMAs are mergeable. Expand the previous VMA to absorb the + // new one's region. + prev_vma.region.expand_by(vma.region.size()); + return; } // If we didn't merge into a previous VMA, insert the new (and possibly diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index 6ecef8c9..69b3fef3 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -54,7 +54,7 @@ use crate::{ memory::{ brk::sys_brk, mincore::sys_mincore, - mmap::{sys_mmap, sys_mprotect, sys_munmap}, + mmap::{sys_mmap, sys_mprotect, sys_mremap, sys_munmap}, process_vm::sys_process_vm_readv, }, net::syscalls::{ @@ -674,6 +674,17 @@ pub async fn handle_syscall(mut ctx: ProcessCtx) { .await .map_err(|e| match e {}), 0xd7 => sys_munmap(&ctx, VA::from_value(arg1 as usize), arg2 as _).await, + 0xd8 => { + sys_mremap( + &ctx, + VA::from_value(arg1 as usize), + arg2 as _, + arg3 as _, + arg4, + VA::from_value(arg5 as usize), + ) + .await + } 0xdc => { sys_clone( &ctx, diff --git a/src/memory/mmap.rs b/src/memory/mmap.rs index 6a312a9a..ebaa9d43 100644 --- a/src/memory/mmap.rs +++ b/src/memory/mmap.rs @@ -1,13 +1,16 @@ use core::sync::atomic::{AtomicUsize, Ordering}; use crate::{process::fd_table::Fd, sched::syscall_ctx::ProcessCtx}; -use alloc::string::{String, ToString}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use libkernel::{ error::{KernelError, Result}, memory::{ address::VA, proc_vm::{ - memory_map::AddressRequest, + memory_map::{AddressRequest, RemapDestination}, vmarea::{VMAPermissions, VMAreaKind}, }, region::VirtMemoryRegion, @@ -25,6 +28,10 @@ const MAP_FIXED_NOREPLACE: u64 = 0x100000; const MAP_ANON: u64 = 0x0020; const MAP_ANONYMOUS: u64 = 0x0020; +const MREMAP_MAYMOVE: u64 = 1; +const MREMAP_FIXED: u64 = 2; +const MREMAP_DONTUNMAP: u64 = 4; + /// Determines the minimal address that user-space is allowed to specify for /// MAP_FIXED{,_NOREPLACE}. static MMAP_MIN_ADDR: AtomicUsize = AtomicUsize::new(0x1000); @@ -138,29 +145,91 @@ pub async fn sys_mmap( Ok(new_mapping_addr.value()) } +fn free_unmapped_pages(pages: Vec) -> Result<()> { + if pages.is_empty() { + return Ok(()); + } + + // The frames returned by unmap/mremap are no longer mapped and belong to this process; + // creating temporary allocations from these regions allows the allocator to reclaim them on drop. + let allocator = crate::memory::PAGE_ALLOC + .get() + .ok_or(KernelError::NoMemory)?; + + // Create a temporary allocation from the single-page region and drop it immediately to free. + for p in pages { + let tmp = unsafe { allocator.alloc_from_region(p.as_phys_range()) }; + drop(tmp); + } + + Ok(()) +} + pub async fn sys_munmap(ctx: &ProcessCtx, addr: VA, len: usize) -> Result { let region = VirtMemoryRegion::new(addr, len); - let pages = ctx.shared().vm.lock_save_irq().mm_mut().munmap(region)?; - // Free any physical frames that were unmapped. - if !pages.is_empty() { - // The frames returned by munmap are no longer mapped and belong to this process; - // creating temporary allocations from these regions allows the allocator to reclaim them on drop. - let allocator = crate::memory::PAGE_ALLOC - .get() - .ok_or(KernelError::NoMemory)?; - - for p in pages { - // Create a temporary allocation from the single-page region and drop it immediately to free. - let tmp = unsafe { allocator.alloc_from_region(p.as_phys_range()) }; - drop(tmp); - } - } + free_unmapped_pages(pages)?; Ok(0) } +pub async fn sys_mremap( + ctx: &ProcessCtx, + old_addr: VA, + old_len: usize, + new_len: usize, + flags: u64, + new_addr: VA, +) -> Result { + if flags & !(MREMAP_MAYMOVE | MREMAP_FIXED | MREMAP_DONTUNMAP) != 0 { + return Err(KernelError::InvalidValue); + } + + if old_len == 0 { + // Linux only allows this special case for shareable mappings, which moss does not support yet. + return Err(KernelError::InvalidValue); + } + + if new_len == 0 || !old_addr.is_page_aligned() { + return Err(KernelError::InvalidValue); + } + + if (flags & MREMAP_DONTUNMAP) != 0 { + return Err(KernelError::InvalidValue); + } + + let destination = if (flags & MREMAP_FIXED) != 0 { + if (flags & MREMAP_MAYMOVE) == 0 || !new_addr.is_page_aligned() { + return Err(KernelError::InvalidValue); + } + + let old_region = VirtMemoryRegion::new(old_addr, old_len).align_to_page_boundary(); + let new_region = VirtMemoryRegion::new(new_addr, new_len).align_to_page_boundary(); + + if old_region.overlaps(new_region) { + return Err(KernelError::InvalidValue); + } + + RemapDestination::Fixed(new_addr) + } else if (flags & MREMAP_MAYMOVE) != 0 { + RemapDestination::MayMove + } else { + RemapDestination::InPlaceOnly + }; + + let (new_mapping_addr, pages_to_free) = + ctx.shared() + .vm + .lock_save_irq() + .mm_mut() + .mremap(old_addr, old_len, new_len, destination)?; + + free_unmapped_pages(pages_to_free)?; + + Ok(new_mapping_addr.value()) +} + pub fn sys_mprotect(ctx: &ProcessCtx, addr: VA, len: usize, prot: u64) -> Result { let perms = prot_to_perms(prot); let region = VirtMemoryRegion::new(addr, len); diff --git a/usertest/src/main.rs b/usertest/src/main.rs index 3272f05d..c3a6122c 100644 --- a/usertest/src/main.rs +++ b/usertest/src/main.rs @@ -196,6 +196,55 @@ fn test_mincore() { register_test!(test_mincore); +fn test_mremap() { + use std::ptr; + + unsafe { + let page_size = libc::sysconf(libc::_SC_PAGESIZE) as usize; + assert!(page_size > 0); + + let addr = libc::mmap( + ptr::null_mut(), + 2 * page_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ); + if addr == libc::MAP_FAILED { + panic!("mmap failed: {}", std::io::Error::last_os_error()); + } + + let base = addr as *mut u8; + ptr::write(base, 0x2a); + ptr::write(base.add(page_size), 0x5a); + + let grown = libc::mremap(addr, 2 * page_size, 4 * page_size, libc::MREMAP_MAYMOVE); + if grown == libc::MAP_FAILED { + panic!("mremap grow failed: {}", std::io::Error::last_os_error()); + } + + let grown = grown as *mut u8; + assert_eq!(ptr::read(grown), 0x2a); + assert_eq!(ptr::read(grown.add(page_size)), 0x5a); + ptr::write(grown.add(3 * page_size), 0x7b); + assert_eq!(ptr::read(grown.add(3 * page_size)), 0x7b); + + let shrunk = libc::mremap(grown.cast(), 4 * page_size, page_size, 0); + if shrunk == libc::MAP_FAILED { + panic!("mremap shrink failed: {}", std::io::Error::last_os_error()); + } + + let shrunk = shrunk as *mut u8; + assert_eq!(ptr::read(shrunk), 0x2a); + + let rc = libc::munmap(shrunk.cast(), page_size); + assert_eq!(rc, 0, "munmap failed: {}", std::io::Error::last_os_error()); + } +} + +register_test!(test_mremap); + fn test_itimer() { use libc::{ITIMER_REAL, itimerval}; use std::mem::MaybeUninit;