Skip to content

Fix panic handling in detached mode to return 500 instead of 499 #1359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions dropshot/src/server.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
// Copyright 2024 Oxide Computer Company
// Copyright 2025 Oxide Computer Company
//! Generic server-wide state and facilities

use super::api_description::ApiDescription;
use super::body::Body;
use super::config::{ConfigDropshot, ConfigTls};
#[cfg(feature = "usdt-probes")]
use super::dtrace::probes;
use super::error::HttpError;
use super::handler::HandlerError;
use super::handler::RequestContext;
use super::http_util::HEADER_REQUEST_ID;
Expand Down Expand Up @@ -967,7 +968,10 @@ async fn http_request_handle<C: ServerContext>(
match rx.await {
Ok(result) => result?,
Err(_) => {
error!(request_log, "handler panicked; propogating panic");
error!(
request_log,
"handler panicked; returning 500 error"
);

// To get the panic, we now need to await `handler_task`; we
// know it is complete _and_ it failed, because it has
Expand All @@ -976,7 +980,27 @@ async fn http_request_handle<C: ServerContext>(
let task_err = handler_task.await.expect_err(
"task failed to send result but didn't panic",
);
panic::resume_unwind(task_err.into_panic());

// Extract panic message if possible
let panic_msg = if task_err.is_panic() {
match task_err.into_panic().downcast::<&'static str>() {
Ok(s) => s.to_string(),
Err(panic_any) => {
match panic_any.downcast::<String>() {
Ok(s) => *s,
Err(_) => "handler panicked".to_string(),
}
}
}
} else {
task_err.to_string()
};

// Instead of propagating the panic, return a 500 error
// This ensures proper status code reporting (500 instead of 499)
return Err(HandlerError::Dropshot(
HttpError::for_internal_error(panic_msg),
));
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion dropshot/tests/integration-tests/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2024 Oxide Computer Company
// Copyright 2025 Oxide Computer Company

//! Integration tests for Dropshot.
//!
Expand All @@ -20,6 +20,7 @@ mod multipart;
mod openapi;
mod pagination;
mod pagination_schema;
mod panic_handling;
mod path_names;
mod starter;
mod streaming;
Expand Down
85 changes: 85 additions & 0 deletions dropshot/tests/integration-tests/panic_handling.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2025 Oxide Computer Company

//! Test cases for handler panic handling.

use dropshot::endpoint;
use dropshot::ApiDescription;
use dropshot::HandlerTaskMode;
use dropshot::HttpError;
use dropshot::HttpResponseOk;
use dropshot::RequestContext;
use http::Method;
use http::StatusCode;
use schemars::JsonSchema;
use serde::Serialize;

use crate::common;

#[derive(Debug, Serialize, JsonSchema)]
struct EmptyResult {}

#[endpoint {
method = GET,
path = "/panic",
}]
async fn handler_that_panics(
_rqctx: RequestContext<()>,
) -> Result<HttpResponseOk<EmptyResult>, HttpError> {
panic!("test panic message");
}

#[tokio::test]
async fn test_panic_handler_returns_500_in_detached_mode() {
let mut api = ApiDescription::new();
api.register(handler_that_panics).unwrap();

let testctx = common::test_setup_with_context(
"test_panic_handler_returns_500_in_detached_mode",
api,
(),
HandlerTaskMode::Detached,
);

testctx
.client_testctx
.make_request_error(
Method::GET,
"/panic",
StatusCode::INTERNAL_SERVER_ERROR,
)
.await;

testctx.teardown().await;
}

// Note: We cannot easily test CancelOnDisconnect mode panic behavior in a unit test
// because panics propagate through the test harness and cause test failures.
// The key difference is:
// - Detached mode: Panics are caught and converted to 500 errors (tested above)
// - CancelOnDisconnect mode: Panics propagate and crash the handler (as intended)
// For now this test is just marked as should_panic.
// TODO: Should this test be removed?
#[tokio::test]
#[should_panic]
async fn test_panic_handler_returns_500_in_cancel_on_disconnect_mode() {
let mut api = ApiDescription::new();
api.register(handler_that_panics).unwrap();

let testctx = common::test_setup_with_context(
"test_panic_handler_returns_500_in_cancel_on_disconnect_mode",
api,
(),
HandlerTaskMode::CancelOnDisconnect,
);

testctx
.client_testctx
.make_request_error(
Method::GET,
"/panic",
StatusCode::INTERNAL_SERVER_ERROR,
)
.await;

testctx.teardown().await;
}