Skip to content

Commit 6777024

Browse files
committed
Use thread local density maps, can massively boost map generation
1 parent 7580b86 commit 6777024

3 files changed

Lines changed: 106 additions & 24 deletions

File tree

Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

splashsurf_lib/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@ rayon = "1.5"
3535
dashmap = "3.11"
3636
fxhash = "0.2"
3737
vtkio = { version = "0.5", optional = true }
38+
thread_local = "1.0"
39+
num_cpus = "1.13"

splashsurf_lib/src/density_map.rs

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
use std::cell::RefCell;
2+
13
use dashmap::ReadOnlyView as ReadDashMap;
24
use log::{info, warn};
35
use nalgebra::Vector3;
46
use rayon::prelude::*;
7+
use thread_local::ThreadLocal;
58

69
use crate::kernel::DiscreteSquaredDistanceCubicKernel;
710
use crate::mesh::{HexMesh3d, MeshWithPointData};
@@ -375,7 +378,8 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>(
375378
}
376379
);
377380

378-
let sparse_densities = ParallelMapType::with_hasher(HashState::default());
381+
// Each thread will write to its own local density map
382+
let sparse_densities: ThreadLocal<RefCell<MapType<I, R>>> = ThreadLocal::new();
379383

380384
// The number of cells in each direction from a particle that can be affected by a its compact support
381385
let half_supported_cells_real = (kernel_radius / cube_size).ceil();
@@ -407,12 +411,15 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>(
407411
);
408412
warn!("No particles can be found in this domain. Increase the domain of surface reconstruction to avoid this.");
409413
} else {
414+
profile!("generate thread local maps");
415+
410416
info!(
411417
"To take into account the kernel evaluation radius, the allowed domain of particles was restricted to: {:?}",
412418
allowed_domain
413419
);
414420

415-
let process_particle = |particle_data: (&Vector3<R>, R)| {
421+
let process_particle = |sparse_densities: &mut MapType<I, R>,
422+
particle_data: (&Vector3<R>, R)| {
416423
let (particle, particle_density) = particle_data;
417424

418425
// Skip particles outside of allowed domain
@@ -484,34 +491,96 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>(
484491
}
485492
};
486493

487-
// TODO: Use chunks for sparse indexing below
488-
// TODO: Use something like num_cpus * 0.66
489-
let chunk_size = particle_positions.len() / 8;
494+
let compute_chunk_size = |num_particles: usize| -> usize {
495+
let min_chunk_size = 100.max(num_particles);
496+
let chunks_per_cpu = 10;
497+
498+
let num_cpus = num_cpus::get();
499+
let num_chunks = chunks_per_cpu * num_cpus;
500+
let chunk_size = (num_particles / num_chunks).min(min_chunk_size);
501+
502+
info!(
503+
"Splitting particles into {} chunks (with {} particles each) for density map generation",
504+
num_chunks, chunk_size
505+
);
506+
chunk_size
507+
};
508+
490509
match active_particles {
491-
None => particle_positions
492-
.par_chunks(chunk_size)
493-
.zip(particle_densities.par_chunks(chunk_size))
494-
.for_each(|(position_chunk, density_chunk)| {
495-
position_chunk
510+
// Process particles, when no list of active particles was provided
511+
None => {
512+
let chunk_size = compute_chunk_size(particle_positions.len());
513+
particle_positions
514+
.par_chunks(chunk_size)
515+
.zip(particle_densities.par_chunks(chunk_size))
516+
.for_each(|(position_chunk, density_chunk)| {
517+
// Obtain mutable reference to thread local density map
518+
let map = sparse_densities
519+
.get_or(|| RefCell::new(MapType::with_hasher(HashState::default())));
520+
let mut mut_map = map.borrow_mut();
521+
522+
let process_particle_map = |particle_data: (&Vector3<R>, R)| {
523+
process_particle(&mut mut_map, particle_data);
524+
};
525+
526+
assert_eq!(position_chunk.len(), density_chunk.len());
527+
position_chunk
528+
.iter()
529+
.zip(density_chunk.iter().copied())
530+
.for_each(process_particle_map);
531+
})
532+
}
533+
// Process particles, when only a subset is active
534+
Some(indices) => {
535+
let chunk_size = compute_chunk_size(indices.len());
536+
indices.par_chunks(chunk_size).for_each(|index_chunk| {
537+
// Obtain mutable reference to thread local density map
538+
let map = sparse_densities
539+
.get_or(|| RefCell::new(MapType::with_hasher(HashState::default())));
540+
let mut mut_map = map.borrow_mut();
541+
542+
let process_particle_map = |particle_data: (&Vector3<R>, R)| {
543+
process_particle(&mut mut_map, particle_data);
544+
};
545+
546+
index_chunk
496547
.iter()
497-
.zip(density_chunk.iter().copied())
498-
.for_each(process_particle)
499-
}),
500-
Some(indices) => indices
501-
.par_iter()
502-
.map(|&i| &particle_positions[i])
503-
.zip(indices.par_iter().map(|&i| particle_densities[i]))
504-
.for_each(process_particle),
548+
.map(|&i| (&particle_positions[i], particle_densities[i]))
549+
.for_each(process_particle_map);
550+
});
551+
}
505552
}
506553
}
507554

508-
info!(
509-
"Sparse density map with {} point data values was constructed.",
510-
sparse_densities.len()
511-
);
512-
info!("Construction of sparse density map done.");
555+
{
556+
profile!("merge thread local maps to global map");
513557

514-
sparse_densities.into()
558+
let mut local_density_maps = sparse_densities
559+
.into_iter()
560+
.map(|m| m.into_inner())
561+
.collect::<Vec<_>>();
562+
563+
info!(
564+
"Merging {} thread local density maps to a single global map...",
565+
local_density_maps.len()
566+
);
567+
568+
// Merge local density maps in parallel by summing the density contributions
569+
let global_density_map = ParallelMapType::with_hasher(HashState::default());
570+
local_density_maps.par_iter_mut().for_each(|local_map| {
571+
for (idx, density) in local_map.drain() {
572+
*global_density_map.entry(idx).or_insert(R::zero()) += density;
573+
}
574+
});
575+
576+
info!(
577+
"Global sparse density map with {} grid point data values was constructed.",
578+
global_density_map.len()
579+
);
580+
info!("Construction of sparse density map done.");
581+
582+
global_density_map.into()
583+
}
515584
}
516585

517586
/// Converts a sparse density map (based on the implicit background grid) to a sparse hexahedral mesh with explicit coordinates for the cells' vertices.

0 commit comments

Comments
 (0)