Skip to content

Commit 778a693

Browse files
authored
Merge branch 'main' into paper
2 parents 4111532 + ff49de3 commit 778a693

23 files changed

Lines changed: 403 additions & 133 deletions

.ci_support/check.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import tomlkit
2+
3+
4+
if __name__ == "__main__":
5+
with open("pyproject.toml", "r") as f:
6+
data = tomlkit.load(f)
7+
8+
lst = []
9+
for sub_lst in data["project"]["optional-dependencies"].values():
10+
for el in sub_lst:
11+
lst.append(el)
12+
13+
data["project"]["dependencies"] += list(set(lst))
14+
15+
with open("pyproject.toml", "w") as f:
16+
f.writelines(tomlkit.dumps(data))

.ci_support/environment-old.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ channels:
22
- conda-forge
33
dependencies:
44
- python
5-
- numpy
5+
- numpy =1.23.5
66
- openmpi =4.1.4
77
- cloudpickle =2.0.0
88
- mpi4py =3.1.4

.github/workflows/pipeline.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ jobs:
9595
- name: Setup
9696
shell: bash -l {0}
9797
run: |
98-
pip install versioneer[toml]==0.29
98+
pip install versioneer[toml]==0.29 tomlkit
99+
python .ci_support/check.py
100+
cat pyproject.toml
99101
pip install . --no-deps --no-build-isolation
100102
pip check
101103

executorlib/base/executor.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
from concurrent.futures import (
77
Future,
88
)
9+
from threading import Thread
910
from typing import Callable, Optional, Union
1011

1112
from executorlib.standalone.inputcheck import check_resource_dict
1213
from executorlib.standalone.queue import cancel_items_in_queue
1314
from executorlib.standalone.serialize import cloudpickle_register
14-
from executorlib.standalone.thread import RaisingThread
1515

1616

1717
class ExecutorBase(FutureExecutor):
@@ -29,7 +29,7 @@ def __init__(self, max_cores: Optional[int] = None):
2929
cloudpickle_register(ind=3)
3030
self._max_cores = max_cores
3131
self._future_queue: Optional[queue.Queue] = queue.Queue()
32-
self._process: Optional[Union[RaisingThread, list[RaisingThread]]] = None
32+
self._process: Optional[Union[Thread, list[Thread]]] = None
3333

3434
@property
3535
def info(self) -> Optional[dict]:
@@ -40,13 +40,13 @@ def info(self) -> Optional[dict]:
4040
Optional[dict]: Information about the executor.
4141
"""
4242
if self._process is not None and isinstance(self._process, list):
43-
meta_data_dict = self._process[0].get_kwargs().copy()
43+
meta_data_dict = self._process[0]._kwargs.copy() # type: ignore
4444
if "future_queue" in meta_data_dict:
4545
del meta_data_dict["future_queue"]
4646
meta_data_dict["max_workers"] = len(self._process)
4747
return meta_data_dict
4848
elif self._process is not None:
49-
meta_data_dict = self._process.get_kwargs().copy()
49+
meta_data_dict = self._process._kwargs.copy() # type: ignore
5050
if "future_queue" in meta_data_dict:
5151
del meta_data_dict["future_queue"]
5252
return meta_data_dict
@@ -138,13 +138,13 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
138138
cancel_items_in_queue(que=self._future_queue)
139139
if self._process is not None and self._future_queue is not None:
140140
self._future_queue.put({"shutdown": True, "wait": wait})
141-
if wait and isinstance(self._process, RaisingThread):
141+
if wait and isinstance(self._process, Thread):
142142
self._process.join()
143143
self._future_queue.join()
144144
self._process = None
145145
self._future_queue = None
146146

147-
def _set_process(self, process: RaisingThread):
147+
def _set_process(self, process: Thread):
148148
"""
149149
Set the process for the executor.
150150

executorlib/cache/executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from threading import Thread
23
from typing import Callable, Optional
34

45
from executorlib.base.executor import ExecutorBase
@@ -15,7 +16,6 @@
1516
check_max_workers_and_cores,
1617
check_nested_flux_executor,
1718
)
18-
from executorlib.standalone.thread import RaisingThread
1919

2020
try:
2121
from executorlib.cache.queue_spawner import execute_with_pysqa
@@ -64,7 +64,7 @@ def __init__(
6464
cache_directory_path = os.path.abspath(cache_directory)
6565
os.makedirs(cache_directory_path, exist_ok=True)
6666
self._set_process(
67-
RaisingThread(
67+
Thread(
6868
target=execute_tasks_h5,
6969
kwargs={
7070
"future_queue": self._future_queue,

executorlib/cache/shared.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,10 @@ def execute_tasks_h5(
115115
]
116116
else:
117117
if len(future_wait_key_lst) > 0:
118-
raise ValueError(
119-
"Future objects are not supported as input if disable_dependencies=True."
118+
task_dict["future"].set_exception(
119+
ValueError(
120+
"Future objects are not supported as input if disable_dependencies=True."
121+
)
120122
)
121123
task_dependent_lst = []
122124
process_dict[task_key] = execute_function(

executorlib/interactive/executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from concurrent.futures import Future
2+
from threading import Thread
23
from typing import Any, Callable, Optional
34

45
from executorlib.base.executor import ExecutorBase
@@ -8,7 +9,6 @@
89
generate_nodes_and_edges,
910
generate_task_hash,
1011
)
11-
from executorlib.standalone.thread import RaisingThread
1212

1313

1414
class ExecutorWithDependencies(ExecutorBase):
@@ -41,7 +41,7 @@ def __init__(
4141
) -> None:
4242
super().__init__(max_cores=max_cores)
4343
self._set_process(
44-
RaisingThread(
44+
Thread(
4545
target=execute_tasks_with_dependencies,
4646
kwargs={
4747
# Executor Arguments

executorlib/interactive/flux.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class FluxPythonSpawner(BaseSpawner):
2929
cores (int, optional): The number of cores. Defaults to 1.
3030
threads_per_core (int, optional): The number of threads per base. Defaults to 1.
3131
gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
32+
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
33+
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
3234
openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
3335
flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
3436
flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
@@ -42,6 +44,8 @@ def __init__(
4244
cores: int = 1,
4345
threads_per_core: int = 1,
4446
gpus_per_core: int = 0,
47+
num_nodes: Optional[int] = None,
48+
exclusive: bool = False,
4549
openmpi_oversubscribe: bool = False,
4650
flux_executor: Optional[flux.job.FluxExecutor] = None,
4751
flux_executor_pmi_mode: Optional[str] = None,
@@ -55,6 +59,8 @@ def __init__(
5559
)
5660
self._threads_per_core = threads_per_core
5761
self._gpus_per_core = gpus_per_core
62+
self._num_nodes = num_nodes
63+
self._exclusive = exclusive
5864
self._flux_executor = flux_executor
5965
self._flux_executor_pmi_mode = flux_executor_pmi_mode
6066
self._flux_executor_nesting = flux_executor_nesting
@@ -85,17 +91,17 @@ def bootup(
8591
num_tasks=self._cores,
8692
cores_per_task=self._threads_per_core,
8793
gpus_per_task=self._gpus_per_core,
88-
num_nodes=None,
89-
exclusive=False,
94+
num_nodes=self._num_nodes,
95+
exclusive=self._exclusive,
9096
)
9197
else:
9298
jobspec = flux.job.JobspecV1.from_nest_command(
9399
command=command_lst,
94100
num_slots=self._cores,
95101
cores_per_slot=self._threads_per_core,
96102
gpus_per_slot=self._gpus_per_core,
97-
num_nodes=None,
98-
exclusive=False,
103+
num_nodes=self._num_nodes,
104+
exclusive=self._exclusive,
99105
)
100106
jobspec.environment = dict(os.environ)
101107
if self._flux_executor_pmi_mode is not None:

executorlib/interactive/shared.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import queue
44
import sys
55
import time
6-
from concurrent.futures import Future
6+
from asyncio.exceptions import CancelledError
7+
from concurrent.futures import Future, TimeoutError
8+
from threading import Thread
79
from time import sleep
810
from typing import Any, Callable, Optional, Union
911

@@ -19,7 +21,6 @@
1921
)
2022
from executorlib.standalone.interactive.spawner import BaseSpawner, MpiExecSpawner
2123
from executorlib.standalone.serialize import serialize_funct_h5
22-
from executorlib.standalone.thread import RaisingThread
2324

2425

2526
class ExecutorBroker(ExecutorBase):
@@ -88,7 +89,7 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
8889
self._process = None
8990
self._future_queue = None
9091

91-
def _set_process(self, process: list[RaisingThread]): # type: ignore
92+
def _set_process(self, process: list[Thread]): # type: ignore
9293
"""
9394
Set the process for the executor.
9495
@@ -148,7 +149,7 @@ def __init__(
148149
executor_kwargs["queue_join_on_shutdown"] = False
149150
self._set_process(
150151
process=[
151-
RaisingThread(
152+
Thread(
152153
target=execute_parallel_tasks,
153154
kwargs=executor_kwargs,
154155
)
@@ -204,7 +205,7 @@ def __init__(
204205
executor_kwargs["max_cores"] = max_cores
205206
executor_kwargs["max_workers"] = max_workers
206207
self._set_process(
207-
RaisingThread(
208+
Thread(
208209
target=execute_separate_tasks,
209210
kwargs=executor_kwargs,
210211
)
@@ -361,15 +362,19 @@ def execute_tasks_with_dependencies(
361362
task_dict is not None and "fn" in task_dict and "future" in task_dict
362363
):
363364
future_lst, ready_flag = _get_future_objects_from_input(task_dict=task_dict)
364-
if len(future_lst) == 0 or ready_flag:
365-
# No future objects are used in the input or all future objects are already done
366-
task_dict["args"], task_dict["kwargs"] = _update_futures_in_input(
367-
args=task_dict["args"], kwargs=task_dict["kwargs"]
368-
)
369-
executor_queue.put(task_dict)
370-
else: # Otherwise add the function to the wait list
371-
task_dict["future_lst"] = future_lst
372-
wait_lst.append(task_dict)
365+
exception_lst = _get_exception_lst(future_lst=future_lst)
366+
if not _get_exception(future_obj=task_dict["future"]):
367+
if len(exception_lst) > 0:
368+
task_dict["future"].set_exception(exception_lst[0])
369+
elif len(future_lst) == 0 or ready_flag:
370+
# No future objects are used in the input or all future objects are already done
371+
task_dict["args"], task_dict["kwargs"] = _update_futures_in_input(
372+
args=task_dict["args"], kwargs=task_dict["kwargs"]
373+
)
374+
executor_queue.put(task_dict)
375+
else: # Otherwise add the function to the wait list
376+
task_dict["future_lst"] = future_lst
377+
wait_lst.append(task_dict)
373378
future_queue.task_done()
374379
elif len(wait_lst) > 0:
375380
number_waiting = len(wait_lst)
@@ -455,7 +460,10 @@ def _submit_waiting_task(wait_lst: list[dict], executor_queue: queue.Queue) -> l
455460
"""
456461
wait_tmp_lst = []
457462
for task_wait_dict in wait_lst:
458-
if all(future.done() for future in task_wait_dict["future_lst"]):
463+
exception_lst = _get_exception_lst(future_lst=task_wait_dict["future_lst"])
464+
if len(exception_lst) > 0:
465+
task_wait_dict["future"].set_exception(exception_lst[0])
466+
elif all(future.done() for future in task_wait_dict["future_lst"]):
459467
del task_wait_dict["future_lst"]
460468
task_wait_dict["args"], task_wait_dict["kwargs"] = _update_futures_in_input(
461469
args=task_wait_dict["args"], kwargs=task_wait_dict["kwargs"]
@@ -483,6 +491,8 @@ def get_result(arg: Union[list[Future], Future]) -> Any:
483491
return arg.result()
484492
elif isinstance(arg, list):
485493
return [get_result(arg=el) for el in arg]
494+
elif isinstance(arg, dict):
495+
return {k: get_result(arg=v) for k, v in arg.items()}
486496
else:
487497
return arg
488498

@@ -510,6 +520,8 @@ def find_future_in_list(lst):
510520
future_lst.append(el)
511521
elif isinstance(el, list):
512522
find_future_in_list(lst=el)
523+
elif isinstance(el, dict):
524+
find_future_in_list(lst=el.values())
513525

514526
find_future_in_list(lst=task_dict["args"])
515527
find_future_in_list(lst=task_dict["kwargs"].values())
@@ -578,7 +590,7 @@ def _submit_function_to_separate_process(
578590
"init_function": None,
579591
}
580592
)
581-
process = RaisingThread(
593+
process = Thread(
582594
target=execute_parallel_tasks,
583595
kwargs=task_kwargs,
584596
)
@@ -599,14 +611,13 @@ def _execute_task(
599611
future_queue (Queue): Queue for receiving new tasks.
600612
"""
601613
f = task_dict.pop("future")
602-
if f.set_running_or_notify_cancel():
614+
if not f.done() and f.set_running_or_notify_cancel():
603615
try:
604616
f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
605617
except Exception as thread_exception:
606618
interface.shutdown(wait=True)
607619
future_queue.task_done()
608620
f.set_exception(exception=thread_exception)
609-
raise thread_exception
610621
else:
611622
future_queue.task_done()
612623

@@ -659,3 +670,15 @@ def _execute_task_with_cache(
659670
future = task_dict["future"]
660671
future.set_result(result)
661672
future_queue.task_done()
673+
674+
675+
def _get_exception_lst(future_lst: list[Future]) -> list:
676+
return [f.exception() for f in future_lst if _get_exception(future_obj=f)]
677+
678+
679+
def _get_exception(future_obj: Future) -> bool:
680+
try:
681+
excp = future_obj.exception(timeout=10**-10)
682+
return excp is not None and not isinstance(excp, CancelledError)
683+
except TimeoutError:
684+
return False

0 commit comments

Comments
 (0)