Skip to content

Commit ecddc6f

Browse files
amphilikebreath
authored andcommitted
vmm: add upper limit for amount of parallel connections during migration
Check that the amount of parallel connections does not exceed 128 and update documentation. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam <sebastian.eydam@cyberus-technology.de>
1 parent a9a832f commit ecddc6f

4 files changed

Lines changed: 60 additions & 8 deletions

File tree

docs/live_migration.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,7 @@ migration process. Via the API or `ch-remote`, you may specify:
207207
Cancel will abort the migration and keep the VM running on the source.
208208
Ignore will proceed with the migration regardless of the downtime requirement.
209209
Defaults to `cancel`.
210+
- `connections <amount>`: \
211+
The number of parallel TCP connections to use for migration.
212+
Must be between `1` and `128`. Defaults to `1`.
213+
Multiple connections are not supported with local UNIX-socket migration.

vmm/src/api/mod.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ pub use self::http::{start_http_fd_thread, start_http_path_thread};
5353
use crate::Error as VmmError;
5454
use crate::config::RestoreConfig;
5555
use crate::device_tree::DeviceTree;
56+
use crate::migration_transport::MAX_MIGRATION_CONNECTIONS;
5657
use crate::vm::{Error as VmError, VmState};
5758
use crate::vm_config::{
5859
DeviceConfig, DiskConfig, FsConfig, GenericVhostUserConfig, NetConfig, PmemConfig,
@@ -324,7 +325,9 @@ pub struct VmSendMigrationData {
324325
#[serde(default)]
325326
pub timeout_strategy: TimeoutStrategy,
326327

327-
/// The number of parallel connections for migration.
328+
/// The number of parallel TCP connections for migration.
329+
///
330+
/// Must be between 1 and `MAX_MIGRATION_CONNECTIONS` inclusive.
328331
#[serde(default = "VmSendMigrationData::default_connections")]
329332
pub connections: NonZeroU32,
330333
}
@@ -459,6 +462,12 @@ impl VmSendMigrationData {
459462
}
460463
}
461464

465+
if self.connections.get() > MAX_MIGRATION_CONNECTIONS {
466+
return Err(VmSendMigrationConfigError::ValidationError(format!(
467+
"connections must not exceed {MAX_MIGRATION_CONNECTIONS}."
468+
)));
469+
}
470+
462471
if self.local {
463472
if !self.destination_url.starts_with("unix:") {
464473
return Err(VmSendMigrationConfigError::ValidationError(
@@ -1785,8 +1794,14 @@ mod unit_tests {
17851794
.expect_err("zero timeout_s should be rejected");
17861795

17871796
// Zero connections is rejected
1788-
let _data = VmSendMigrationData::parse("destination_url=unix:/tmp/sock,connections=0")
1789-
.expect_err("zero connections should be rejected");
1797+
let _data =
1798+
VmSendMigrationData::parse("destination_url=tcp:192.168.1.1:8080,connections=0")
1799+
.expect_err("zero connections should be rejected");
1800+
1801+
// Excessive numbers of parallel connections are rejected
1802+
let _data =
1803+
VmSendMigrationData::parse("destination_url=tcp:192.168.1.1:8080,connections=129")
1804+
.expect_err("too many connections should be rejected");
17901805

17911806
// Unknown option is an error
17921807
VmSendMigrationData::parse("destination_url=unix:/tmp/sock,unknown_field=foo").unwrap_err();

vmm/src/api/openapi/cloud-hypervisor.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,11 @@ components:
14101410
format: int64
14111411
default: 1
14121412
minimum: 1
1413+
maximum: 128
1414+
description: >
1415+
The number of parallel TCP connections to use for migration.
1416+
Must be between 1 and 128. Multiple connections are not supported
1417+
with local UNIX-socket migration.
14131418
14141419
VmAddUserDevice:
14151420
required:

vmm/src/migration_transport.rs

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ use vmm_sys_util::eventfd::EventFd;
3232
use crate::sync_utils::Gate;
3333
use crate::{GuestMemoryMmap, VmMigrationConfig};
3434

35+
/// Hard upper bound for migration worker connections on both the sender and
36+
/// receiver side.
37+
pub(crate) const MAX_MIGRATION_CONNECTIONS: u32 = 128;
38+
3539
/// Transport-agnostic listener used to receive connections.
3640
#[derive(Debug)]
3741
pub(crate) enum ReceiveListener {
@@ -288,12 +292,30 @@ impl ReceiveAdditionalConnections {
288292
guest_memory: &GuestMemoryAtomic<GuestMemoryMmap>,
289293
) -> Result<(), MigratableError> {
290294
let mut threads: Vec<thread::JoinHandle<Result<(), MigratableError>>> = Vec::new();
291-
while let Some(mut socket) = listener.abortable_accept(terminate_fd)? {
295+
let mut first_err = loop {
296+
let socket = match listener.abortable_accept(terminate_fd) {
297+
Ok(socket) => socket,
298+
Err(e) => break Err(e),
299+
};
300+
let Some(mut socket) = socket else {
301+
break Ok(());
302+
};
303+
304+
if threads.len() >= MAX_MIGRATION_CONNECTIONS as usize {
305+
break Err(MigratableError::MigrateReceive(anyhow!(
306+
"Received more than {MAX_MIGRATION_CONNECTIONS} additional migration connections."
307+
)));
308+
}
309+
292310
let guest_memory = guest_memory.clone();
293-
let terminate_fd = terminate_fd
311+
let terminate_fd = match terminate_fd
294312
.try_clone()
295313
.context("Error cloning terminate fd")
296-
.map_err(MigratableError::MigrateReceive)?;
314+
.map_err(MigratableError::MigrateReceive)
315+
{
316+
Ok(terminate_fd) => terminate_fd,
317+
Err(e) => break Err(e),
318+
};
297319

298320
match thread::Builder::new()
299321
.name(format!("migrate-receive-memory-{}", threads.len()).to_owned())
@@ -303,15 +325,21 @@ impl ReceiveAdditionalConnections {
303325
Ok(t) => threads.push(t),
304326
Err(e) => {
305327
error!("Error spawning receive-memory thread: {e}");
306-
break;
328+
break Err(MigratableError::MigrateReceive(
329+
anyhow!(e).context("Error spawning receive-memory thread"),
330+
));
307331
}
308332
}
333+
};
334+
335+
if first_err.is_err() {
336+
warn!("Signaling termination due to an error while accepting connections.");
337+
let _ = terminate_fd.write(1);
309338
}
310339

311340
info!("Stopped accepting additional connections. Cleaning up threads.");
312341

313342
// We only return the first error we encounter here.
314-
let mut first_err = Ok(());
315343
for thread in threads {
316344
let err = match thread.join() {
317345
Ok(Ok(())) => None,

0 commit comments

Comments
 (0)