Skip to content

Transient boot selection #2050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
539 changes: 273 additions & 266 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ array-init = { version = "2.1.0" }
arrayvec = { version = "0.7.4", default-features = false }
atty = { version = "0.2", default-features = false }
bitfield = { version = "0.13", default-features = false }
bitflags = { version = "2.5.0", default-features = false }
bitflags = { version = "2.9.1", default-features = false }
bstringify = { version = "0.1.2", default-features = false }
byteorder = { version = "1.3.4", default-features = false }
bzip2-rs = { version = "0.1.2", default-features = false }
Expand Down Expand Up @@ -82,6 +82,7 @@ getrandom = { version = "0.2", default-features = false }
goblin = { version = "0.4.3", default-features = true } # goblin::Object doesn't work without everything enabled
heapless = { version = "0.7.17", default-features = false }
heck = { version = "0.5.0", default-features = false }
hex-literal = { version = "0.4.1" }
hkdf = { version = "0.12", default-features = false }
hmac = { version = "0.12.1", default-features = false }
hubpack = { version = "0.1.2", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion app/lpc55xpresso/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ priority = 3
max-sizes = {flash = 26720, ram = 16704}
stacksize = 8192
start = true
sections = {bootstate = "usbsram"}
sections = {bootstate = "usbsram", transient_override = "override"}
uses = ["flash_controller", "hash_crypt"]
notifications = ["flash-irq", "hashcrypt-irq"]
interrupts = {"flash_controller.irq" = "flash-irq", "hash_crypt.irq" = "hashcrypt-irq"}
Expand Down
2 changes: 1 addition & 1 deletion app/oxide-rot-1/app-dev.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ name = "lpc55-update-server"
priority = 3
stacksize = 8192
start = true
sections = {bootstate = "usbsram"}
sections = {bootstate = "usbsram", transient_override = "override"}
uses = ["flash_controller", "hash_crypt"]
notifications = ["flash-irq", "hashcrypt-irq"]
interrupts = {"flash_controller.irq" = "flash-irq", "hash_crypt.irq" = "hashcrypt-irq"}
Expand Down
2 changes: 1 addition & 1 deletion app/oxide-rot-1/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ name = "lpc55-update-server"
priority = 3
stacksize = 8192
start = true
sections = {bootstate = "usbsram"}
sections = {bootstate = "usbsram", transient_override = "override"}
uses = ["flash_controller", "hash_crypt"]
notifications = ["flash-irq", "hashcrypt-irq"]
interrupts = {"flash_controller.irq" = "flash-irq", "hash_crypt.irq" = "hashcrypt-irq"}
Expand Down
2 changes: 1 addition & 1 deletion app/rot-carrier/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ priority = 3
# TODO size this appropriately
stacksize = 8192
start = true
sections = {bootstate = "usbsram"}
sections = {bootstate = "usbsram", transient_override = "override"}
uses = ["flash_controller", "hash_crypt"]
notifications = ["flash-irq", "hashcrypt-irq"]
interrupts = {"flash_controller.irq" = "flash-irq", "hash_crypt.irq" = "hashcrypt-irq"}
Expand Down
16 changes: 16 additions & 0 deletions chips/lpc55/memory.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ write = true
execute = false
dma = true

[[override]]
name = "a"
address = 0x2003ffe0
size = 32
read = true
write = true
execute = false

[[override]]
name = "b"
address = 0x2003ffe0
size = 32
read = true
write = true
execute = false

# Info about the images loaded into flash and dumped by stage0 into USB SRAM
# for hubris use.
[[usbsram]]
Expand Down
1 change: 1 addition & 0 deletions drv/lpc55-update-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ mutable-statics = { path = "../../lib/mutable-statics" }

cfg-if.workspace = true
hubpack.workspace = true
hex-literal.workspace = true
idol-runtime.workspace = true
lpc55-pac.workspace = true
num-traits.workspace = true
Expand Down
132 changes: 125 additions & 7 deletions drv/lpc55-update-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use drv_lpc55_update_api::{
SlotId, SwitchDuration, UpdateTarget, VersionedRotBootInfo,
};
use drv_update_api::UpdateError;
use hex_literal::hex;
use idol_runtime::{
ClientError, Leased, LenLimit, NotificationHandler, RequestError, R, W,
};
Expand All @@ -40,6 +41,10 @@ const PAGE_SIZE: u32 = BYTES_PER_FLASH_PAGE as u32;
#[link_section = ".bootstate"]
static BOOTSTATE: MaybeUninit<[u8; 0x1000]> = MaybeUninit::uninit();

#[used]
#[link_section = ".transient_override"]
static mut TRANSIENT_OVERRIDE: MaybeUninit<[u8; 32]> = MaybeUninit::uninit();

#[derive(Copy, Clone, PartialEq)]
enum UpdateState {
NoUpdate,
Expand Down Expand Up @@ -495,8 +500,45 @@ impl idl::InOrderUpdateImpl for ServerImpl<'_> {

self.image = match (component, slot) {
(RotComponent::Hubris, SlotId::A)
| (RotComponent::Hubris, SlotId::B)
| (RotComponent::Stage0, SlotId::B) => Some((component, slot)),
| (RotComponent::Hubris, SlotId::B) => {
// Fail early on attempt to update the running image.
if same_image(component, slot) {
return Err(UpdateError::InvalidSlotIdForOperation.into());
}

// Rollback protection will be implemented by refusing to set
// boot preference to an image that has a lower EPOC value in
// its caboose.
// Setting the boot preference before updating would sidestep that
// protection. So, we will fail the prepare step if any
// preference settings are selecting the update target image.
//
// After the update, the boot image selection will be based on:
// - there being only one properly signed image, or
// - transient boot selection (highest priority), or
// - pending persistent selection (altering the persistent
// selection)
// - persistent preference if neither of the above was used.

let (persistent, pending_persistent, transient) =
self.boot_preferences()?;

// The transient preference must not select the update target.
if transient == Some(slot) {
return Err(UpdateError::InvalidPreferredSlotId.into());
}
// If there is a pending persistent preference, it must
// not select the update target.
if pending_persistent == Some(slot) {
return Err(UpdateError::InvalidPreferredSlotId.into());
} else if slot == persistent {
// If there is no pending persistent preference, then the
// persistent preference must select the currently active image.
return Err(UpdateError::InvalidPreferredSlotId.into());
}
Some((component, slot))
}
(RotComponent::Stage0, SlotId::B) => Some((component, slot)),
_ => return Err(UpdateError::InvalidSlotIdForOperation.into()),
};
self.state = UpdateState::InProgress;
Expand Down Expand Up @@ -626,9 +668,7 @@ impl ServerImpl<'_> {
None
};

// We only support persistent override at this point
// We need to read the magic ram value to fill this in.
let transient_boot_preference = None;
let transient_boot_preference = get_hubris_transient_override();

Ok((
persistent_boot_preference,
Expand Down Expand Up @@ -905,10 +945,32 @@ impl ServerImpl<'_> {
slot: SlotId,
duration: SwitchDuration,
) -> Result<(), RequestError<UpdateError>> {
// Note: Rollback policy will be checking epoch values before activating.
match duration {
SwitchDuration::Once => {
// TODO deposit command token into buffer
return Err(UpdateError::NotImplemented.into());
// Get the currently active slot.
let active_slot = match bootstate()
.map_err(|_| UpdateError::MissingHandoffData)?
.active
{
stage0_handoff::RotSlot::A => SlotId::A,
stage0_handoff::RotSlot::B => SlotId::B,
};

// If the requested slot is the same as the active slot,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These semantics seem confusing / non-obvious. I think it would make more sense to explicitly return an error (InvalidPreferredSlotId?) if someone tries to set the transient override to be the same as the active slot.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for the case where a previous update attempt has been abandoned or failed and left a transient preference that the new session does not want. We want to enable resetting an existing transient preference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This semantic came out of a fault insertion test where this type of abandoned session was simulated.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still seems non ideal for the developer user case. I brought up not clearing the transient setting without some indication before #2050 (comment) albeit in a slightly different place. I'd rather have an explicit way to clear the transient setting if we want that behavior. This would require MGS changes but I'd argue this is a gap in how the original API was designed.

// treat this as a request to CLEAR the transient override.
//
// If we did allow setting the active slot as the transient
// preference, then we get this confusing 2-boot scenario when
// pending persistent preference is also used:
// the next reset returns us to the original image and the 2nd
// reset has us running the new/alternate image.
Comment on lines +963 to +967
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This statement seems strange in contrast to "This is for the case where a previous update attempt has been abandoned or failed and left a transient preference that the new session does not want" from above. If we are running active slot A and alternate slot is B, an incomplete/aborted update will always be targeting B. Why do we have a persistent setting for B if the update was aborted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current API is a bit deficient. The user may reasonably expect that A or B can be selected at any time and that since we are in an update flow, that that is enough. When you consider overall symmetry and failed update recovery, then a boot preference clearing operation is also needed.

if active_slot == slot {
set_hubris_transient_override(None);
} else {
// Otherwise, set the preference as requested.
set_hubris_transient_override(Some(slot));
}
}
SwitchDuration::Forever => {
// Locate and return the authoritative CFPA flash word number
Expand Down Expand Up @@ -940,6 +1002,19 @@ impl ServerImpl<'_> {
let (cfpa_word_number, _) =
self.cfpa_word_number_and_version(CfpaPage::Active)?;

// TODO: Hubris issue #2093: If there is a pending update in the
// scratch page, it is not being considered.
// Consider:
// - A is active
// - persistent switch to B without reset
// - scratch page is updated
// - return Ok(())
// - persistent switch to A without reset
// - A is already active, no work performed
// - return Ok(())
// - reset
// - B is active

// Read current CFPA contents.
let mut cfpa = [[0u32; 4]; 512 / 16];
indirect_flash_read_words(
Expand Down Expand Up @@ -1337,6 +1412,49 @@ fn bootstate() -> Result<RotBootStateV2, HandoffDataLoadError> {
RotBootStateV2::load_from_addr(addr)
}

fn set_transient_override(preference: [u8; 32]) {
// Safety: Data is consumed by Bootleby on next boot.
// There are no concurrent writers possible.
// Calling this function multiple times is ok.
// Bootleby is careful to vet contents before acting.
unsafe {
TRANSIENT_OVERRIDE.write(preference);
}
}

fn get_transient_override() -> [u8; 32] {
// Safety: Data is consumed by Bootleby on next boot.
// There are no concurrent writers possible.
// Bootleby consumes and resets TRANSIENT_OVERRIDE.
// The client may be verifying state set during update flows.
unsafe { TRANSIENT_OVERRIDE.assume_init() }
}

// Preference constants are taken from bootleby:src/lib.rs
const PREFER_SLOT_A: [u8; 32] =
hex!("edb23f2e9b399c3d57695262f29615910ed10c8d9b261bfc2076b8c16c84f66d");
const PREFER_SLOT_B: [u8; 32] =
hex!("70ed2914e6fdeeebbb02763b96da9faa0160b7fc887425f4d45547071d0ce4ba");
// Bootleby writes all zeros after reading. We write all ones to reset.
const PREFER_NOTHING: [u8; 32] = [0xffu8; 32];

pub fn set_hubris_transient_override(bank: Option<SlotId>) {
match bank {
// Do we need a value that says we were here and cleared?
None => set_transient_override(PREFER_NOTHING),
Some(SlotId::A) => set_transient_override(PREFER_SLOT_A),
Some(SlotId::B) => set_transient_override(PREFER_SLOT_B),
}
}

pub fn get_hubris_transient_override() -> Option<SlotId> {
match get_transient_override() {
PREFER_SLOT_A => Some(SlotId::A),
PREFER_SLOT_B => Some(SlotId::B),
_ => None,
}
}

fn round_up_to_flash_page(offset: u32) -> Option<u32> {
offset.checked_next_multiple_of(BYTES_PER_FLASH_PAGE as u32)
}
Expand Down
3 changes: 3 additions & 0 deletions drv/update-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ pub enum UpdateError {
ImageMismatch,
SignatureNotValidated,
VersionNotSupported,

InvalidPreferredSlotId,
}

impl From<UpdateError> for GwUpdateError {
Expand Down Expand Up @@ -103,6 +105,7 @@ impl From<UpdateError> for GwUpdateError {
UpdateError::ImageMismatch => Self::ImageMismatch,
UpdateError::SignatureNotValidated => Self::SignatureNotValidated,
UpdateError::VersionNotSupported => Self::VersionNotSupported,
UpdateError::InvalidPreferredSlotId => Self::InvalidPreferredSlotId,
}
}
}