Skip to content

[DRAFT - DO NOT MERGE] Basic OpenTelemetry integration #1201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,466 changes: 936 additions & 530 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions dropshot/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,21 @@ slog-async = "2.8.0"
slog-bunyan = "2.5.0"
slog-json = "2.6.1"
slog-term = "2.9.1"
tracing = { version = "0.1.40", optional = true }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"], optional = true }
thiserror = "2.0.12"
tokio-rustls = "0.25.0"
toml = "0.8.23"
waitgroup = "0.1.2"

opentelemetry = { version = "0.30", optional = true }
opentelemetry-http = { version = "0.30", features = ["hyper"], optional = true }
opentelemetry-semantic-conventions = { version = "0.30", optional = true }
opentelemetry-otlp = { version = "0.30", features = ["tonic"], optional = true }
opentelemetry_sdk = { version = "0.30", features = ["rt-tokio"], optional = true }
opentelemetry-stdout = { version = "0.30", optional = true }
tracing-opentelemetry = { version = "0.31", optional = true }

[dependencies.chrono]
version = "0.4.41"
features = [ "serde", "std", "clock" ]
Expand Down Expand Up @@ -136,6 +146,8 @@ version_check = "0.9.5"
[features]
usdt-probes = ["usdt/asm"]
internal-docs = ["simple-mermaid"]
tracing = ["dep:tracing", "dep:tracing-subscriber"]
otel-tracing = ["tracing", "opentelemetry", "opentelemetry-http", "opentelemetry-semantic-conventions", "opentelemetry-otlp", "opentelemetry_sdk", "opentelemetry-stdout", "tracing-opentelemetry"]

[package.metadata.docs.rs]
features = ["internal-docs"]
316 changes: 316 additions & 0 deletions dropshot/examples/otel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
// Copyright 2025 Oxide Computer Company
//! Example use of Dropshot with OpenTelemetry integration.
//!
//! Dropshot's built-in OpenTelemetry support will automatically parse
//! standard OTEL environment variables.
//! If you launch an otel-collector or otel-enabled jaeger-all-in-one
//! listening for otlp over http, then you can do:
//!
//! ```bash
//! export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
//! export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
//! cargo run --features=otel-tracing --example otel&
//! curl http://localhost:4000/get
//! ```
//!
//! And you should see an example trace.

use dropshot::endpoint;
use dropshot::ApiDescription;
use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
use dropshot::ConfigLoggingLevel;
use dropshot::HttpError;
use dropshot::HttpResponseOk;
use dropshot::HttpResponseUpdatedNoContent;
use dropshot::HttpServerStarter;
use dropshot::RequestContext;
use dropshot::TypedBody;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
use tracing;

#[tokio::main]
async fn main() -> Result<(), String> {
let config_dropshot = ConfigDropshot {
bind_address: "127.0.0.1:4000".parse().unwrap(),
..Default::default()
};

// For simplicity, we'll configure an "info"-level logger that writes to
// stderr assuming that it's a terminal.
let config_logging =
ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info };
let log = config_logging
.to_logger("example-basic")
.map_err(|error| format!("failed to create logger: {}", error))?;

// Initialize tracing with both slog bridge and OpenTelemetry support
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
let _tracing_guard = dropshot::tracing_support::init_tracing(&log)
.await
.map_err(|e| format!("failed to initialize tracing: {}", e))?;

// Build a description of the API.
let mut api = ApiDescription::new();
api.register(example_api_get_counter).unwrap();
api.register(example_api_put_counter).unwrap();
api.register(example_api_get).unwrap();
api.register(example_api_error).unwrap();
api.register(example_api_panic).unwrap();
api.register(example_api_sleep).unwrap();
api.register(example_api_exit).unwrap();

// The functions that implement our API endpoints will share this context.
let api_context = ExampleContext::new();

// Set up the server.
let server =
HttpServerStarter::new(&config_dropshot, api, api_context, &log)
.map_err(|error| format!("failed to create server: {}", error))?
.start();

let shutdown = server.wait_for_shutdown();

tokio::task::spawn(async move {
loop {
if server.app_private().shutdown.load(Ordering::SeqCst) {
break;
} else {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}
server.close().await.unwrap();
});

// From a separate task, wait for the server to stop.
shutdown.await
}

/// Application-specific example context (state shared by handler functions)
#[derive(Debug)]
struct ExampleContext {
/// counter that can be manipulated by requests to the HTTP API
counter: AtomicU64,
shutdown: AtomicBool,
}

impl ExampleContext {
/// Return a new ExampleContext.
pub fn new() -> ExampleContext {
ExampleContext {
counter: AtomicU64::new(0),
shutdown: AtomicBool::new(false),
}
}
}

// HTTP API interface

/// `CounterValue` represents the value of the API's counter, either as the
/// response to a GET request to fetch the counter or as the body of a PUT
/// request to update the counter.
#[derive(Debug, Deserialize, Serialize, JsonSchema)]
struct CounterValue {
counter: u64,
}

/// Demonstrates creating child spans for internal operations using tracing instrumentation
#[endpoint {
method = GET,
path = "/get",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(rqctx), fields(counter_processing = tracing::field::Empty)))]
async fn example_api_get(
rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::info!("Starting counter fetch with processing");

// Simulate some work
fetch_counter_with_delay().await;

let api_context = rqctx.context();
let counter_value = api_context.counter.load(Ordering::SeqCst);

// Do some "processing" that would benefit from being traced
let processed_value = process_counter_value(counter_value).await;

// Record the processing result in the span
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::Span::current().record("counter_processing", processed_value);

#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::info!(
processed_value = processed_value,
"Counter processing completed"
);

Ok(HttpResponseOk(CounterValue { counter: processed_value }))
}

#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument
)]
async fn fetch_counter_with_delay() {
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::debug!("Simulating work");
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}

#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument
)]
async fn process_counter_value(counter_value: u64) -> u64 {
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::debug!(input_value = counter_value, "Processing counter value");
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
let result = counter_value * 2; // Some arbitrary processing
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::debug!(output_value = result, "Counter processing complete");
result
}

/// Fetch the current value of the counter.
#[endpoint {
method = GET,
path = "/counter",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(rqctx))
)]
async fn example_api_get_counter(
rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
let api_context = rqctx.context();
let counter = api_context.counter.load(Ordering::SeqCst);

#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::info!(counter_value = counter, "Retrieved counter value");

Ok(HttpResponseOk(CounterValue { counter }))
}

/// Demonstrates error tracing - errors will be marked on the span
#[endpoint {
method = GET,
path = "/error",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(_rqctx))
)]
async fn example_api_error(
_rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::warn!("About to return an error for demonstration");
let error =
HttpError::for_internal_error("This endpoint is broken".to_string());
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::error!(error = ?error, "Returning demonstration error");
Err(error)
}

/// Demonstrates panic handling - panics are converted to 500 errors and traced
#[endpoint {
method = GET,
path = "/panic",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(_rqctx))
)]
async fn example_api_panic(
_rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
panic!("This handler panics to demonstrate error tracing");
}

/// Takes too long so the client disconnects
#[endpoint {
method = GET,
path = "/sleep",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(_rqctx))
)]
async fn example_api_sleep(
_rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
Err(HttpError::for_internal_error(
"This endpoint takes too long".to_string(),
))
}

/// Exit shortcut
#[endpoint {
method = GET,
path = "/exit",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(rqctx))
)]
async fn example_api_exit(
rqctx: RequestContext<ExampleContext>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
rqctx.context().shutdown.store(true, Ordering::SeqCst);
Ok(HttpResponseUpdatedNoContent())
}

/// Update the current value of the counter. Note that the special value of 10
/// is not allowed (just to demonstrate how to generate an error).
#[endpoint {
method = PUT,
path = "/counter",
}]
#[cfg_attr(
any(feature = "tracing", feature = "otel-tracing"),
tracing::instrument(skip(rqctx, update), fields(new_value = tracing::field::Empty)))]
async fn example_api_put_counter(
rqctx: RequestContext<ExampleContext>,
update: TypedBody<CounterValue>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let api_context = rqctx.context();
let updated_value = update.into_inner();

// Record the new value in the span
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::Span::current().record("new_value", updated_value.counter);
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::info!(
new_counter_value = updated_value.counter,
"Updating counter"
);

if updated_value.counter == 10 {
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::warn!(
rejected_value = updated_value.counter,
"Rejecting forbidden value"
);
Err(HttpError::for_bad_request(
Some(String::from("BadInput")),
format!("do not like the number {}", updated_value.counter),
))
} else {
api_context.counter.store(updated_value.counter, Ordering::SeqCst);
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
tracing::info!(
updated_counter = updated_value.counter,
"Counter updated successfully"
);
Ok(HttpResponseUpdatedNoContent())
}
}
1 change: 1 addition & 0 deletions dropshot/src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ where
/// This type is not exported to Dropshot consumers; it is purely an internal
/// implementation detail of the interface between `HttpHandlerFunc` and the
/// server.
#[derive(Debug)]
pub enum HandlerError {
/// An error returned by a fallible handler function itself.
///
Expand Down
4 changes: 4 additions & 0 deletions dropshot/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,11 +852,15 @@ mod from_map;
mod handler;
mod http_util;
mod logging;
#[cfg(feature = "otel-tracing")]
pub mod otel;
mod pagination;
mod router;
mod schema_util;
mod server;
mod to_map;
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
pub mod tracing_support;
mod type_util;
mod versioning;
mod websocket;
Expand Down
Loading