diff --git a/support/tracelimit/src/lib.rs b/support/tracelimit/src/lib.rs index 4a2aeaac77..0d3d0548c4 100644 --- a/support/tracelimit/src/lib.rs +++ b/support/tracelimit/src/lib.rs @@ -38,12 +38,12 @@ pub fn disable_rate_limiting(disabled: bool) { #[doc(hidden)] pub struct RateLimiter { - period_ms: u32, - events_per_period: u32, state: Mutex, } struct RateLimiterState { + period_ms: u32, + events_per_period: u32, start: Option, events: u32, missed: u64, @@ -59,9 +59,9 @@ impl RateLimiter { pub const fn new(period_ms: u32, events_per_period: u32) -> Self { Self { - period_ms, - events_per_period, state: Mutex::new(RateLimiterState { + period_ms, + events_per_period, start: None, events: 0, missed: 0, @@ -74,31 +74,110 @@ impl RateLimiter { /// `missed_events` is `Some(n)` if there were any missed events or if this /// event is the last one before rate limiting kicks in. pub fn event(&self) -> Result, RateLimited> { + self.event_with_config(None, None) + } + + /// Returns `Ok(missed_events)` if this event should be logged. + /// Optionally reconfigures the rate limiter if new parameters are provided. + /// + /// `missed_events` is `Some(n)` if there were any missed events or if this + /// event is the last one before rate limiting kicks in. + pub fn event_with_config( + &self, + period_ms: Option, + events_per_period: Option, + ) -> Result, RateLimited> { if DISABLE_RATE_LIMITING.load(Ordering::Relaxed) { return Ok(None); } + let mut state = self.state.try_lock().ok_or(RateLimited)?; + + // Reconfigure if new parameters are provided + let mut reset_state = false; + if let Some(new_period) = period_ms { + if state.period_ms != new_period { + state.period_ms = new_period; + reset_state = true; + } + } + if let Some(new_events_per_period) = events_per_period { + if state.events_per_period != new_events_per_period { + state.events_per_period = new_events_per_period; + reset_state = true; + } + } + + // Reset state when parameters change + if reset_state { + state.start = None; + state.events = 0; + state.missed = 0; + } + let now = Instant::now(); + let period_ms = state.period_ms; let start = state.start.get_or_insert(now); let elapsed = now.duration_since(*start); - if elapsed.as_millis() > self.period_ms as u128 { + if elapsed.as_millis() > period_ms as u128 { *start = now; state.events = 0; } - if state.events >= self.events_per_period { + if state.events >= state.events_per_period { state.missed += 1; return Err(RateLimited); } state.events += 1; let missed = std::mem::take(&mut state.missed); - let missed = (missed != 0 || state.events == self.events_per_period).then_some(missed); + let missed = (missed != 0 || state.events == state.events_per_period).then_some(missed); Ok(missed) } } /// As [`tracing::error!`], but rate limited. +/// +/// Can be called with optional parameters to customize rate limiting: +/// - `period: ` - rate limiting period in milliseconds +/// - `limit: ` - maximum events per period +/// +/// Examples: +/// ``` +/// use tracelimit::error_ratelimited; +/// error_ratelimited!("simple message"); +/// error_ratelimited!(period: 1000, limit: 5, "custom rate limit"); +/// error_ratelimited!(period: 10000, "custom period only"); +/// error_ratelimited!(limit: 50, "custom limit only"); +/// ``` #[macro_export] macro_rules! error_ratelimited { + // With both period and limit + (period: $period:expr, limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) { + $crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With period only + (period: $period:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) { + $crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With limit only + (limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) { + $crate::tracing::error!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // Default case (no custom parameters) ($($rest:tt)*) => { { static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); @@ -110,8 +189,49 @@ macro_rules! error_ratelimited { } /// As [`tracing::warn!`], but rate limited. +/// +/// Can be called with optional parameters to customize rate limiting: +/// - `period: ` - rate limiting period in milliseconds +/// - `limit: ` - maximum events per period +/// +/// Examples: +/// ``` +/// use tracelimit::warn_ratelimited; +/// warn_ratelimited!("simple message"); +/// warn_ratelimited!(period: 1000, limit: 5, "custom rate limit"); +/// warn_ratelimited!(period: 10000, "custom period only"); +/// warn_ratelimited!(limit: 50, "custom limit only"); +/// ``` #[macro_export] macro_rules! warn_ratelimited { + // With both period and limit + (period: $period:expr, limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) { + $crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With period only + (period: $period:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) { + $crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With limit only + (limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) { + $crate::tracing::warn!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // Default case (no custom parameters) ($($rest:tt)*) => { { static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); @@ -123,8 +243,49 @@ macro_rules! warn_ratelimited { } /// As [`tracing::info!`], but rate limited. +/// +/// Can be called with optional parameters to customize rate limiting: +/// - `period: ` - rate limiting period in milliseconds +/// - `limit: ` - maximum events per period +/// +/// Examples: +/// ``` +/// use tracelimit::info_ratelimited; +/// info_ratelimited!("simple message"); +/// info_ratelimited!(period: 1000, limit: 5, "custom rate limit"); +/// info_ratelimited!(period: 10000, "custom period only"); +/// info_ratelimited!(limit: 50, "custom limit only"); +/// ``` #[macro_export] macro_rules! info_ratelimited { + // With both period and limit + (period: $period:expr, limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), Some($limit)) { + $crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With period only + (period: $period:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(Some($period), None) { + $crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // With limit only + (limit: $limit:expr, $($rest:tt)*) => { + { + static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); + if let Ok(missed_events) = RATE_LIMITER.event_with_config(None, Some($limit)) { + $crate::tracing::info!(dropped_ratelimited = missed_events, $($rest)*); + } + } + }; + // Default case (no custom parameters) ($($rest:tt)*) => { { static RATE_LIMITER: $crate::RateLimiter = $crate::RateLimiter::new_default(); diff --git a/vm/devices/firmware/firmware_uefi/src/lib.rs b/vm/devices/firmware/firmware_uefi/src/lib.rs index b6257ebb83..ce71b3139f 100644 --- a/vm/devices/firmware/firmware_uefi/src/lib.rs +++ b/vm/devices/firmware/firmware_uefi/src/lib.rs @@ -70,6 +70,7 @@ use local_clock::InspectableLocalClock; use pal_async::local::block_on; use platform::logger::UefiLogger; use platform::nvram::VsmConfig; +use service::diagnostics::DEFAULT_LOGS_PER_PERIOD; use std::convert::TryInto; use std::ops::RangeInclusive; use std::task::Context; @@ -269,13 +270,17 @@ impl UefiDevice { tracelimit::info_ratelimited!(?addr, data, "set gpa for diagnostics"); self.service.diagnostics.set_gpa(data) } - UefiCommand::PROCESS_EFI_DIAGNOSTICS => self.process_diagnostics(false, "guest"), + UefiCommand::PROCESS_EFI_DIAGNOSTICS => { + self.process_diagnostics(false, DEFAULT_LOGS_PER_PERIOD, "guest") + } _ => tracelimit::warn_ratelimited!(addr, data, "unknown uefi write"), } } fn inspect_extra(&mut self, _resp: &mut inspect::Response<'_>) { - self.process_diagnostics(true, "inspect_extra"); + // This is the only place where we allow reprocessing diagnostics, + // since this will be driven by the user themselves. + self.process_diagnostics(true, DEFAULT_LOGS_PER_PERIOD, "inspect_extra"); } } @@ -317,8 +322,10 @@ impl PollDevice for UefiDevice { // Poll watchdog timeout events if let Poll::Ready(Ok(())) = self.watchdog_recv.poll_recv(cx) { - tracing::info!("watchdog timeout received"); - self.process_diagnostics(true, "watchdog timeout"); + // NOTE: Do not allow reprocessing diagnostics here. + // UEFI programs the watchdog's configuration, so we should assume that + // this path could trigger multiple times. + self.process_diagnostics(false, DEFAULT_LOGS_PER_PERIOD, "watchdog timeout"); } } } diff --git a/vm/devices/firmware/firmware_uefi/src/service/diagnostics.rs b/vm/devices/firmware/firmware_uefi/src/service/diagnostics.rs index 14b49f8a12..bf9ddd5328 100644 --- a/vm/devices/firmware/firmware_uefi/src/service/diagnostics.rs +++ b/vm/devices/firmware/firmware_uefi/src/service/diagnostics.rs @@ -32,6 +32,9 @@ use uefi_specs::hyperv::debug_level::DEBUG_FLAG_NAMES; use uefi_specs::hyperv::debug_level::DEBUG_WARN; use zerocopy::FromBytes; +/// Default number of EfiDiagnosticsLogs emitted per period +pub const DEFAULT_LOGS_PER_PERIOD: u32 = 150; + /// 8-byte alignment for every entry const ALIGNMENT: usize = 8; @@ -103,26 +106,29 @@ fn phase_to_string(phase: u16) -> &'static str { } /// Defines how we want EfiDiagnosticsLog entries to be handled. -pub fn handle_efi_diagnostics_log<'a>(log: EfiDiagnosticsLog<'a>) { +pub fn handle_efi_diagnostics_log(log: EfiDiagnosticsLog<'_>, limit: u32) { let debug_level_str = debug_level_to_string(log.debug_level); let phase_str = phase_to_string(log.phase); match log.debug_level { - DEBUG_WARN => tracing::warn!( + DEBUG_WARN => tracelimit::warn_ratelimited!( + limit: limit, debug_level = %debug_level_str, ticks = log.ticks, phase = %phase_str, log_message = log.message, "EFI log entry" ), - DEBUG_ERROR => tracing::error!( + DEBUG_ERROR => tracelimit::error_ratelimited!( + limit: limit, debug_level = %debug_level_str, ticks = log.ticks, phase = %phase_str, log_message = log.message, "EFI log entry" ), - _ => tracing::info!( + _ => tracelimit::info_ratelimited!( + limit: limit, debug_level = %debug_level_str, ticks = log.ticks, phase = %phase_str, @@ -283,11 +289,13 @@ impl DiagnosticsServices { /// /// # Arguments /// * `allow_reprocess` - If true, allows processing even if already processed for guest + /// * `triggered_by` - String to indicate who triggered the diagnostics processing /// * `gm` - Guest memory to read diagnostics from /// * `log_handler` - Function to handle each parsed log entry fn process_diagnostics( &mut self, allow_reprocess: bool, + triggered_by: &str, gm: &GuestMemory, mut log_handler: F, ) -> Result<(), DiagnosticsError> @@ -450,7 +458,12 @@ impl DiagnosticsServices { } // Print summary statistics - tracelimit::info_ratelimited!(entries_processed, bytes_read, "processed EFI log entries"); + tracelimit::info_ratelimited!( + triggered_by, + entries_processed, + bytes_read, + "processed EFI log entries" + ); Ok(()) } @@ -461,16 +474,22 @@ impl UefiDevice { /// /// # Arguments /// * `allow_reprocess` - If true, allows processing even if already processed for guest - /// * `context` - String to indicate who triggered the diagnostics processing - pub(crate) fn process_diagnostics(&mut self, allow_reprocess: bool, context: &str) { + /// * `triggered_by` - String to indicate who triggered the diagnostics processing + pub(crate) fn process_diagnostics( + &mut self, + allow_reprocess: bool, + limit: u32, + triggered_by: &str, + ) { if let Err(error) = self.service.diagnostics.process_diagnostics( allow_reprocess, + triggered_by, &self.gm, - handle_efi_diagnostics_log, + |log| handle_efi_diagnostics_log(log, limit), ) { tracelimit::error_ratelimited!( error = &error as &dyn std::error::Error, - context, + triggered_by, "failed to process diagnostics buffer" ); }