|
| 1 | +use std::cell::RefCell; |
| 2 | + |
1 | 3 | use dashmap::ReadOnlyView as ReadDashMap; |
2 | 4 | use log::{info, warn}; |
3 | 5 | use nalgebra::Vector3; |
4 | 6 | use rayon::prelude::*; |
| 7 | +use thread_local::ThreadLocal; |
5 | 8 |
|
6 | 9 | use crate::kernel::DiscreteSquaredDistanceCubicKernel; |
7 | 10 | use crate::mesh::{HexMesh3d, MeshWithPointData}; |
@@ -375,7 +378,8 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>( |
375 | 378 | } |
376 | 379 | ); |
377 | 380 |
|
378 | | - let sparse_densities = ParallelMapType::with_hasher(HashState::default()); |
| 381 | + // Each thread will write to its own local density map |
| 382 | + let sparse_densities: ThreadLocal<RefCell<MapType<I, R>>> = ThreadLocal::new(); |
379 | 383 |
|
380 | 384 | // The number of cells in each direction from a particle that can be affected by a its compact support |
381 | 385 | let half_supported_cells_real = (kernel_radius / cube_size).ceil(); |
@@ -407,12 +411,15 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>( |
407 | 411 | ); |
408 | 412 | warn!("No particles can be found in this domain. Increase the domain of surface reconstruction to avoid this."); |
409 | 413 | } else { |
| 414 | + profile!("generate thread local maps"); |
| 415 | + |
410 | 416 | info!( |
411 | 417 | "To take into account the kernel evaluation radius, the allowed domain of particles was restricted to: {:?}", |
412 | 418 | allowed_domain |
413 | 419 | ); |
414 | 420 |
|
415 | | - let process_particle = |particle_data: (&Vector3<R>, R)| { |
| 421 | + let process_particle = |sparse_densities: &mut MapType<I, R>, |
| 422 | + particle_data: (&Vector3<R>, R)| { |
416 | 423 | let (particle, particle_density) = particle_data; |
417 | 424 |
|
418 | 425 | // Skip particles outside of allowed domain |
@@ -484,34 +491,96 @@ pub fn parallel_generate_sparse_density_map<I: Index, R: Real>( |
484 | 491 | } |
485 | 492 | }; |
486 | 493 |
|
487 | | - // TODO: Use chunks for sparse indexing below |
488 | | - // TODO: Use something like num_cpus * 0.66 |
489 | | - let chunk_size = particle_positions.len() / 8; |
| 494 | + let compute_chunk_size = |num_particles: usize| -> usize { |
| 495 | + let min_chunk_size = 100.max(num_particles); |
| 496 | + let chunks_per_cpu = 10; |
| 497 | + |
| 498 | + let num_cpus = num_cpus::get(); |
| 499 | + let num_chunks = chunks_per_cpu * num_cpus; |
| 500 | + let chunk_size = (num_particles / num_chunks).min(min_chunk_size); |
| 501 | + |
| 502 | + info!( |
| 503 | + "Splitting particles into {} chunks (with {} particles each) for density map generation", |
| 504 | + num_chunks, chunk_size |
| 505 | + ); |
| 506 | + chunk_size |
| 507 | + }; |
| 508 | + |
490 | 509 | match active_particles { |
491 | | - None => particle_positions |
492 | | - .par_chunks(chunk_size) |
493 | | - .zip(particle_densities.par_chunks(chunk_size)) |
494 | | - .for_each(|(position_chunk, density_chunk)| { |
495 | | - position_chunk |
| 510 | + // Process particles, when no list of active particles was provided |
| 511 | + None => { |
| 512 | + let chunk_size = compute_chunk_size(particle_positions.len()); |
| 513 | + particle_positions |
| 514 | + .par_chunks(chunk_size) |
| 515 | + .zip(particle_densities.par_chunks(chunk_size)) |
| 516 | + .for_each(|(position_chunk, density_chunk)| { |
| 517 | + // Obtain mutable reference to thread local density map |
| 518 | + let map = sparse_densities |
| 519 | + .get_or(|| RefCell::new(MapType::with_hasher(HashState::default()))); |
| 520 | + let mut mut_map = map.borrow_mut(); |
| 521 | + |
| 522 | + let process_particle_map = |particle_data: (&Vector3<R>, R)| { |
| 523 | + process_particle(&mut mut_map, particle_data); |
| 524 | + }; |
| 525 | + |
| 526 | + assert_eq!(position_chunk.len(), density_chunk.len()); |
| 527 | + position_chunk |
| 528 | + .iter() |
| 529 | + .zip(density_chunk.iter().copied()) |
| 530 | + .for_each(process_particle_map); |
| 531 | + }) |
| 532 | + } |
| 533 | + // Process particles, when only a subset is active |
| 534 | + Some(indices) => { |
| 535 | + let chunk_size = compute_chunk_size(indices.len()); |
| 536 | + indices.par_chunks(chunk_size).for_each(|index_chunk| { |
| 537 | + // Obtain mutable reference to thread local density map |
| 538 | + let map = sparse_densities |
| 539 | + .get_or(|| RefCell::new(MapType::with_hasher(HashState::default()))); |
| 540 | + let mut mut_map = map.borrow_mut(); |
| 541 | + |
| 542 | + let process_particle_map = |particle_data: (&Vector3<R>, R)| { |
| 543 | + process_particle(&mut mut_map, particle_data); |
| 544 | + }; |
| 545 | + |
| 546 | + index_chunk |
496 | 547 | .iter() |
497 | | - .zip(density_chunk.iter().copied()) |
498 | | - .for_each(process_particle) |
499 | | - }), |
500 | | - Some(indices) => indices |
501 | | - .par_iter() |
502 | | - .map(|&i| &particle_positions[i]) |
503 | | - .zip(indices.par_iter().map(|&i| particle_densities[i])) |
504 | | - .for_each(process_particle), |
| 548 | + .map(|&i| (&particle_positions[i], particle_densities[i])) |
| 549 | + .for_each(process_particle_map); |
| 550 | + }); |
| 551 | + } |
505 | 552 | } |
506 | 553 | } |
507 | 554 |
|
508 | | - info!( |
509 | | - "Sparse density map with {} point data values was constructed.", |
510 | | - sparse_densities.len() |
511 | | - ); |
512 | | - info!("Construction of sparse density map done."); |
| 555 | + { |
| 556 | + profile!("merge thread local maps to global map"); |
513 | 557 |
|
514 | | - sparse_densities.into() |
| 558 | + let mut local_density_maps = sparse_densities |
| 559 | + .into_iter() |
| 560 | + .map(|m| m.into_inner()) |
| 561 | + .collect::<Vec<_>>(); |
| 562 | + |
| 563 | + info!( |
| 564 | + "Merging {} thread local density maps to a single global map...", |
| 565 | + local_density_maps.len() |
| 566 | + ); |
| 567 | + |
| 568 | + // Merge local density maps in parallel by summing the density contributions |
| 569 | + let global_density_map = ParallelMapType::with_hasher(HashState::default()); |
| 570 | + local_density_maps.par_iter_mut().for_each(|local_map| { |
| 571 | + for (idx, density) in local_map.drain() { |
| 572 | + *global_density_map.entry(idx).or_insert(R::zero()) += density; |
| 573 | + } |
| 574 | + }); |
| 575 | + |
| 576 | + info!( |
| 577 | + "Global sparse density map with {} grid point data values was constructed.", |
| 578 | + global_density_map.len() |
| 579 | + ); |
| 580 | + info!("Construction of sparse density map done."); |
| 581 | + |
| 582 | + global_density_map.into() |
| 583 | + } |
515 | 584 | } |
516 | 585 |
|
517 | 586 | /// Converts a sparse density map (based on the implicit background grid) to a sparse hexahedral mesh with explicit coordinates for the cells' vertices. |
|
0 commit comments