From bc16a7f8fd5d64a71aa12e0407f56a7f6e58f828 Mon Sep 17 00:00:00 2001 From: Andrew Thelen Date: Tue, 28 Apr 2026 10:50:46 -0400 Subject: [PATCH 1/5] fix bug that caused LB/UB to not be used if limit was 0 --- mphys/network/remote_component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mphys/network/remote_component.py b/mphys/network/remote_component.py index b0468b4a..cd4bde6d 100644 --- a/mphys/network/remote_component.py +++ b/mphys/network/remote_component.py @@ -450,13 +450,13 @@ def _lower_bound_used(self, bound): if hasattr(bound, "__len__"): return (np.array(bound) > -1e20).any() else: - return bound + return bound > -1e20 def _upper_bound_used(self, bound): if hasattr(bound, "__len__"): return (np.array(bound) < 1e20).any() else: - return bound + return bound < 1e20 def _add_constraints_from_baseline_model(self, output_dict): for con in output_dict["constraints"].keys(): From efe7c6dda6f2d439060682ce1253b13a4df5d77e Mon Sep 17 00:00:00 2001 From: Andrew Thelen Date: Thu, 7 May 2026 09:16:40 -0700 Subject: [PATCH 2/5] add stop_server_for_down_time, which allows server to be stopped during expected down time --- .../as_opt_remote_parallel.py | 6 +++++- .../supersonic_panel/as_opt_remote_serial.py | 21 ++++++++++++++++--- mphys/network/remote_component.py | 16 ++++++++++++++ mphys/network/zmq_pbs.py | 21 +++++++++++-------- 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py b/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py index 4c245a1d..e5e8d299 100644 --- a/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py +++ b/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py @@ -14,7 +14,11 @@ def setup(self): # NOTE: make sure setup isn't called multiple times, otherwise the first jobs/port forwarding will go unused and you'll have to stop them manually for i in range(self.options["num_scenarios"]): - pbs_launcher = PBS.k4(time=1) + pbs_launcher = PBS.k4( + profile_filename='~/.bashrc', + requested_number_of_nodes=1, + time=1 + ) pbs_launcher.mpiexec = "mpirun" pbs_launcher.requested_number_of_nodes = 1 diff --git a/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py b/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py index 6b14f0f1..fd681398 100644 --- a/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py +++ b/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py @@ -60,10 +60,25 @@ def run_optimization(prob: om.Problem): def main(): check_totals = False + hpc = "k" # nas or k - pbs = PBS.k4(time=1) - pbs.mpiexec = "mpirun" - pbs.requested_number_of_nodes = 1 + if hpc == "nas": + + pbs = PBS.nas( + profile_filename='~/.bashrc', + #group_list=None, # add group list here + proc_type='rom', + requested_number_of_nodes=1, + time=1, + ) + + elif hpc == "k": + + pbs = PBS.k4( + profile_filename='~/.bashrc', + requested_number_of_nodes=1, + time=1, + ) prob = om.Problem() prob.model.add_subsystem( diff --git a/mphys/network/remote_component.py b/mphys/network/remote_component.py index cd4bde6d..0aaf924a 100644 --- a/mphys/network/remote_component.py +++ b/mphys/network/remote_component.py @@ -112,6 +112,13 @@ def initialize(self): types=bool, desc="Skip the objective/constraint definition. The quantities will still be added to outputs", ) + self.options.declare( + "stop_server_for_down_time", + default=0, + types=int, + desc="Stop server after evaluation, in case significant down time is expected afterwards. Allows user to conserve HPC " + + "SBUs in certain applications. 0=never, 1=after first function call, 2=after first derivative call.", + ) @switch_run_directory def setup(self): @@ -164,6 +171,9 @@ def setup(self): self.skip_objective_constraint_definition = self.options[ "skip_objective_constraint_definition" ] + self.stop_server_for_down_time = self.options[ + "stop_server_for_down_time" + ] self._add_design_inputs_from_baseline_model(output_dict) self._add_objectives_from_baseline_model(output_dict) @@ -232,6 +242,12 @@ def evaluate_model(self, remote_input_dict=None, command="initialize"): else: self.times_function = np.hstack([self.times_function, model_time_elapsed]) + if command != "initialize" and self.stop_server_for_down_time > 0: + if self.stop_server_for_down_time == 1 or (self.stop_server_for_down_time == 2 and self._doing_derivative_evaluation(command)): + if self.comm.rank == 0: + print(f"CLIENT (subsystem {self.name}): Stopping server's HPC job for down time") + self.server_manager.stop_server() + return remote_output_dict def _assign_objective_partials_from_remote_output(self, remote_dict, partials): diff --git a/mphys/network/zmq_pbs.py b/mphys/network/zmq_pbs.py index 9a718a6b..c52b72bf 100644 --- a/mphys/network/zmq_pbs.py +++ b/mphys/network/zmq_pbs.py @@ -123,16 +123,19 @@ def start_server(self): self._initialize_connection() self.server_counter += 1 self._launch_job() + self.server_stopped = False def stop_server(self): - print( - f"CLIENT (subsystem {self.component_name}): Stopping the remote analysis server", - flush=True, - ) - if self.job.state == "R": - self.socket.send("shutdown|null".encode()) - self._shutdown_server() - self.socket.close() + if not self.server_stopped: + print( + f"CLIENT (subsystem {self.component_name}): Stopping the remote analysis server", + flush=True, + ) + if self.job.state == "R": + self.socket.send("shutdown|null".encode()) + self._shutdown_server() + self.socket.close() + self.server_stopped = True def enough_time_is_remaining(self, estimated_model_time): self.job.update_job_state() @@ -143,7 +146,7 @@ def enough_time_is_remaining(self, estimated_model_time): def job_has_expired(self): self.job.update_job_state() - if self.job.state == "R": + if self.job.state == "R" and not self.server_stopped: return False else: if self.job_expiration_max_restarts is not None: From 6f40f9909a86cd0cee65599a10a773c00396d876 Mon Sep 17 00:00:00 2001 From: Andrew Thelen Date: Thu, 7 May 2026 09:24:54 -0700 Subject: [PATCH 3/5] format --- .../supersonic_panel/as_opt_remote_parallel.py | 4 +--- .../supersonic_panel/as_opt_remote_serial.py | 10 +++++----- mphys/network/remote_component.py | 13 ++++++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py b/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py index e5e8d299..6f4208e7 100644 --- a/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py +++ b/examples/aerostructural/supersonic_panel/as_opt_remote_parallel.py @@ -15,9 +15,7 @@ def setup(self): for i in range(self.options["num_scenarios"]): pbs_launcher = PBS.k4( - profile_filename='~/.bashrc', - requested_number_of_nodes=1, - time=1 + profile_filename="~/.bashrc", requested_number_of_nodes=1, time=1 ) pbs_launcher.mpiexec = "mpirun" pbs_launcher.requested_number_of_nodes = 1 diff --git a/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py b/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py index fd681398..09a147e2 100644 --- a/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py +++ b/examples/aerostructural/supersonic_panel/as_opt_remote_serial.py @@ -60,14 +60,14 @@ def run_optimization(prob: om.Problem): def main(): check_totals = False - hpc = "k" # nas or k + hpc = "k" # nas or k if hpc == "nas": pbs = PBS.nas( - profile_filename='~/.bashrc', - #group_list=None, # add group list here - proc_type='rom', + profile_filename="~/.bashrc", + # group_list=None, # add group list here + proc_type="rom", requested_number_of_nodes=1, time=1, ) @@ -75,7 +75,7 @@ def main(): elif hpc == "k": pbs = PBS.k4( - profile_filename='~/.bashrc', + profile_filename="~/.bashrc", requested_number_of_nodes=1, time=1, ) diff --git a/mphys/network/remote_component.py b/mphys/network/remote_component.py index 0aaf924a..4a988b77 100644 --- a/mphys/network/remote_component.py +++ b/mphys/network/remote_component.py @@ -171,9 +171,7 @@ def setup(self): self.skip_objective_constraint_definition = self.options[ "skip_objective_constraint_definition" ] - self.stop_server_for_down_time = self.options[ - "stop_server_for_down_time" - ] + self.stop_server_for_down_time = self.options["stop_server_for_down_time"] self._add_design_inputs_from_baseline_model(output_dict) self._add_objectives_from_baseline_model(output_dict) @@ -243,9 +241,14 @@ def evaluate_model(self, remote_input_dict=None, command="initialize"): self.times_function = np.hstack([self.times_function, model_time_elapsed]) if command != "initialize" and self.stop_server_for_down_time > 0: - if self.stop_server_for_down_time == 1 or (self.stop_server_for_down_time == 2 and self._doing_derivative_evaluation(command)): + if self.stop_server_for_down_time == 1 or ( + self.stop_server_for_down_time == 2 + and self._doing_derivative_evaluation(command) + ): if self.comm.rank == 0: - print(f"CLIENT (subsystem {self.name}): Stopping server's HPC job for down time") + print( + f"CLIENT (subsystem {self.name}): Stopping server's HPC job for down time" + ) self.server_manager.stop_server() return remote_output_dict From 640dd8a9d4f00ca3740c54e7d43370f9ca0388d8 Mon Sep 17 00:00:00 2001 From: Andrew Thelen Date: Thu, 7 May 2026 09:40:17 -0700 Subject: [PATCH 4/5] also use stop_server_for_down_time after server initialization --- mphys/network/remote_component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mphys/network/remote_component.py b/mphys/network/remote_component.py index 4a988b77..264d8740 100644 --- a/mphys/network/remote_component.py +++ b/mphys/network/remote_component.py @@ -240,7 +240,7 @@ def evaluate_model(self, remote_input_dict=None, command="initialize"): else: self.times_function = np.hstack([self.times_function, model_time_elapsed]) - if command != "initialize" and self.stop_server_for_down_time > 0: + if self.stop_server_for_down_time > 0: if self.stop_server_for_down_time == 1 or ( self.stop_server_for_down_time == 2 and self._doing_derivative_evaluation(command) From 8d16803a33e0af0a5514a90aa0bc82601ad933ae Mon Sep 17 00:00:00 2001 From: Andrew Thelen Date: Thu, 7 May 2026 10:13:34 -0700 Subject: [PATCH 5/5] move self.stop_server_for_down_time ahead of initialization --- mphys/network/remote_component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mphys/network/remote_component.py b/mphys/network/remote_component.py index 264d8740..bfb26bf1 100644 --- a/mphys/network/remote_component.py +++ b/mphys/network/remote_component.py @@ -125,6 +125,7 @@ def setup(self): self.var_naming_dot_replacement = self.options["var_naming_dot_replacement"] self.use_derivative_coloring = self.options["use_derivative_coloring"] self.derivative_coloring_num = 0 + self.stop_server_for_down_time = self.options["stop_server_for_down_time"] output_dict = None if self.comm.rank == 0: @@ -171,7 +172,6 @@ def setup(self): self.skip_objective_constraint_definition = self.options[ "skip_objective_constraint_definition" ] - self.stop_server_for_down_time = self.options["stop_server_for_down_time"] self._add_design_inputs_from_baseline_model(output_dict) self._add_objectives_from_baseline_model(output_dict)