Skip to content

Commit 0eb915c

Browse files
committed
Don't panic on run_forever exceptions
1 parent 0588650 commit 0eb915c

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

monarch_hyperactor/src/actor.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ impl Actor for PythonActor {
342342
/// Create a new TaskLocals with its own asyncio event loop in a dedicated thread.
343343
fn create_task_locals() -> pyo3_async_runtimes::TaskLocals {
344344
let (tx, rx) = std::sync::mpsc::channel();
345-
let _ = std::thread::spawn(move || {
345+
let _ = std::thread::Builder::new().name("asyncio-event-loop".to_string()).spawn(move || {
346346
Python::with_gil(|py| {
347347
let asyncio = Python::import(py, "asyncio").unwrap();
348348
let event_loop = asyncio.call_method0("new_event_loop").unwrap();
@@ -354,7 +354,19 @@ fn create_task_locals() -> pyo3_async_runtimes::TaskLocals {
354354
.copy_context(py)
355355
.unwrap();
356356
tx.send(task_locals).unwrap();
357-
event_loop.call_method0("run_forever").unwrap();
357+
// The event loop can surface an error if one of the tasks it runs raises
358+
// an exception.
359+
event_loop.call_method0("run_forever").inspect_err(|err| {
360+
eprintln!("Event loop exited with error: {}", err);
361+
});
362+
// Regardless of whether the event loop exited cleanly or not, we need
363+
// to close the loop. Equivalent to finally in Python.
364+
event_loop.call_method0("stop").inspect_err(|err| {
365+
eprintln!("Failed to stop event loop: {}", err);
366+
});
367+
event_loop.call_method0("close").inspect_err(|err| {
368+
eprintln!("Failed to close event loop: {}", err);
369+
});
358370
});
359371
});
360372
rx.recv().unwrap()

python/monarch/_src/actor/actor_mesh.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,10 @@ async def instrumented():
807807
port.exception(ActorError(e))
808808
except BaseException as e:
809809
self._post_mortem_debug(e.__traceback__)
810+
if isinstance(e, SystemExit):
811+
# SystemExit is a BaseException but not a rust panic. We'll just
812+
# reraise it
813+
raise
810814
# A BaseException can be thrown in the case of a Rust panic.
811815
# In this case, we need a way to signal the panic to the Rust side.
812816
# See [Panics in async endpoints]

0 commit comments

Comments
 (0)