Skip to content

op-supervisor: add API for Failsafe mode (part of AutoStop) #16582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions op-service/apis/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ type SupervisorAdminAPI interface {
Stop(ctx context.Context) error
AddL2RPC(ctx context.Context, rpc string, jwtSecret eth.Bytes32) error
Rewind(ctx context.Context, chain eth.ChainID, block eth.BlockID) error
SetFailsafeEnabled(ctx context.Context, enabled bool) error
GetFailsafeEnabled(ctx context.Context) (bool, error)
}

type SupervisorQueryAPI interface {
Expand Down
17 changes: 17 additions & 0 deletions op-service/sources/supervisor_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,23 @@ func (cl *SupervisorClient) Rewind(ctx context.Context, chain eth.ChainID, block
return cl.client.CallContext(ctx, nil, "admin_rewind", chain, block)
}

func (cl *SupervisorClient) SetFailsafeEnabled(ctx context.Context, enabled bool) error {
err := cl.client.CallContext(ctx, nil, "admin_setFailsafeEnabled", enabled)
if err != nil {
return fmt.Errorf("failed to set failsafe mode for Supervisor: %w", err)
}
return nil
}

func (cl *SupervisorClient) GetFailsafeEnabled(ctx context.Context) (bool, error) {
var enabled bool
err := cl.client.CallContext(ctx, &enabled, "admin_getFailsafeEnabled")
if err != nil {
return false, fmt.Errorf("failed to get failsafe mode for Supervisor: %w", err)
}
return enabled, nil
}

func (cl *SupervisorClient) CheckAccessList(ctx context.Context, inboxEntries []common.Hash,
minSafety types.SafetyLevel, executingDescriptor types.ExecutingDescriptor) error {
return cl.client.CallContext(ctx, nil, "supervisor_checkAccessList", inboxEntries, minSafety, executingDescriptor)
Expand Down
3 changes: 3 additions & 0 deletions op-supervisor/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ type Config struct {

// RPCVerificationWarnings enables asynchronous RPC verification of DB checkAccess call in the CheckAccessList endpoint, indicating warnings as a metric
RPCVerificationWarnings bool

// FailsafeEnabled enables failsafe mode for the supervisor
FailsafeEnabled bool
}

func (c *Config) Check() error {
Expand Down
9 changes: 9 additions & 0 deletions op-supervisor/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ var (
EnvVars: prefixEnvVars("RPC_VERIFICATION_WARNINGS"),
Value: false,
}
FailsafeEnabledFlag = &cli.BoolFlag{
Name: "failsafe-enabled",
Usage: "Start the supervisor with failsafe enabled. In failsafe mode, the supervisor will reject all CheckAccessList requests. " +
"All other Indexing and Cross Validation actions will continue to operate normally.",
EnvVars: prefixEnvVars("FAILSAFE_ENABLED"),
Value: false,
}
)

var requiredFlags = []cli.Flag{
Expand All @@ -112,6 +119,7 @@ var optionalFlags = []cli.Flag{
DependencySetFlag,
RollupConfigPathsFlag,
RollupConfigSetFlag,
FailsafeEnabledFlag,
}

func init() {
Expand Down Expand Up @@ -188,6 +196,7 @@ func ConfigFromCLI(ctx *cli.Context, version string) (*config.Config, error) {
RPC: oprpc.ReadCLIConfig(ctx),
MockRun: ctx.Bool(MockRunFlag.Name),
RPCVerificationWarnings: ctx.Bool(RPCVerificationWarningsFlag.Name),
FailsafeEnabled: ctx.Bool(FailsafeEnabledFlag.Name),
L1RPC: ctx.String(L1RPCFlag.Name),
SyncSources: syncSourceSetups(ctx),
Datadir: ctx.Path(DataDirFlag.Name),
Expand Down
34 changes: 34 additions & 0 deletions op-supervisor/supervisor/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ type SupervisorBackend struct {

// rpcVerificationWarnings enables asynchronous RPC verification of DB checkAccess call in the CheckAccessList endpoint, indicating warnings as a metric
rpcVerificationWarnings bool

// failsafeEnabled controls whether the supervisor should enable failsafe mode
failsafeEnabled atomic.Bool
}

var (
Expand Down Expand Up @@ -160,6 +163,8 @@ func NewSupervisorBackend(ctx context.Context, logger log.Logger,

rpcVerificationWarnings: cfg.RPCVerificationWarnings,
}
// Set failsafe from config
super.setFailsafeEnabled(cfg.FailsafeEnabled)
eventSys.Register("backend", super)
eventSys.Register("rewinder", super.rewinder)

Expand Down Expand Up @@ -550,6 +555,12 @@ func (su *SupervisorBackend) checkSafety(chainID eth.ChainID, blockID eth.BlockI

func (su *SupervisorBackend) CheckAccessList(ctx context.Context, inboxEntries []common.Hash,
minSafety types.SafetyLevel, execDescr types.ExecutingDescriptor) error {
// Check if failsafe is enabled
if su.isFailsafeEnabled() {
su.logger.Debug("Failsafe is enabled, rejecting access-list check")
return types.ErrFailsafeEnabled
}

switch minSafety {
case types.LocalUnsafe, types.CrossUnsafe, types.LocalSafe, types.CrossSafe, types.Finalized:
// valid safety level
Expand Down Expand Up @@ -821,3 +832,26 @@ func (su *SupervisorBackend) SetConfDepthL1(depth uint64) {
func (su *SupervisorBackend) Rewind(ctx context.Context, chain eth.ChainID, block eth.BlockID) error {
return su.chainDBs.Rewind(chain, block)
}

// SetFailsafeEnabled sets the failsafe mode configuration for the supervisor.
func (su *SupervisorBackend) SetFailsafeEnabled(ctx context.Context, enabled bool) error {
su.setFailsafeEnabled(enabled)
return nil
}

// setFailsafeEnabled sets the failsafe mode configuration for the supervisor.
// it is an internal function because it does not need context, nor does it return an error.
func (su *SupervisorBackend) setFailsafeEnabled(enabled bool) {
su.failsafeEnabled.Store(enabled)
}

// GetFailsafeEnabled gets the current failsafe mode configuration for the supervisor.
func (su *SupervisorBackend) GetFailsafeEnabled(ctx context.Context) (bool, error) {
return su.isFailsafeEnabled(), nil
}

// isFailsafeEnabled returns whether failsafe is enabled.
func (su *SupervisorBackend) isFailsafeEnabled() bool {
// presently the failsafe bool is 1:1 with failsafe being enabled
return su.failsafeEnabled.Load()
}
96 changes: 96 additions & 0 deletions op-supervisor/supervisor/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,3 +601,99 @@ func TestAsyncVerifyAccessWithRPC(t *testing.T) {
// No error + match => 0 failures
runScenario("NoErr_match", sealA, nil, idA)
}

func TestFailsafeEnabled(t *testing.T) {
logger := testlog.Logger(t, log.LvlInfo)
m := metrics.NoopMetrics
dataDir := t.TempDir()
fullCfgSet := fullConfigSet(t, 1)

cfg := &config.Config{
Version: "test",
FullConfigSetSource: fullCfgSet,
SynchronousProcessors: true,
MockRun: false,
SyncSources: &syncnode.CLISyncNodes{},
Datadir: dataDir,
}

ex := event.NewGlobalSynchronous(context.Background())
b, err := NewSupervisorBackend(context.Background(), logger, m, cfg, ex)
require.NoError(t, err)

// Test initial state - failsafe should be disabled by default
enabled, err := b.GetFailsafeEnabled(context.Background())
require.NoError(t, err)
require.False(t, enabled, "failsafe should be disabled by default")

// Test that CheckAccessList works normally in initial state
err = b.CheckAccessList(context.Background(), []common.Hash{}, types.LocalUnsafe, types.ExecutingDescriptor{})
require.NoError(t, err, "CheckAccessList should work normally when failsafe is disabled")

// Test setting failsafe to true
err = b.SetFailsafeEnabled(context.Background(), true)
require.NoError(t, err)
enabled, err = b.GetFailsafeEnabled(context.Background())
require.NoError(t, err)
require.True(t, enabled, "failsafe should be enabled after setting to true")

// Test that CheckAccessList returns ErrFailsafeEnabled when failsafe is enabled
err = b.CheckAccessList(context.Background(), []common.Hash{}, types.LocalUnsafe, types.ExecutingDescriptor{})
require.ErrorIs(t, err, types.ErrFailsafeEnabled, "CheckAccessList should return ErrFailsafeEnabled when failsafe is enabled")

// Test setting failsafe to false
err = b.SetFailsafeEnabled(context.Background(), false)
require.NoError(t, err)
enabled, err = b.GetFailsafeEnabled(context.Background())
require.NoError(t, err)
require.False(t, enabled, "failsafe should be disabled after setting to false")

// Test that CheckAccessList works normally when failsafe is disabled
err = b.CheckAccessList(context.Background(), []common.Hash{}, types.LocalUnsafe, types.ExecutingDescriptor{})
require.NoError(t, err, "CheckAccessList should work normally when failsafe is disabled")
}

// TestFailsafeEnabledConfigInitialization confirms the configured failsafe state is correctly initialized
func TestFailsafeEnabledConfigInitialization(t *testing.T) {
logger := testlog.Logger(t, log.LvlInfo)
m := metrics.NoopMetrics
dataDir := t.TempDir()
fullCfgSet := fullConfigSet(t, 1)

testCases := []struct {
name string
failsafeEnabled bool
}{
{
name: "FailsafeEnabled",
failsafeEnabled: true,
},
{
name: "FailsafeDisabled",
failsafeEnabled: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
cfg := &config.Config{
Version: "test",
FullConfigSetSource: fullCfgSet,
SynchronousProcessors: true,
MockRun: false,
SyncSources: &syncnode.CLISyncNodes{},
Datadir: dataDir,
FailsafeEnabled: tc.failsafeEnabled,
}

ex := event.NewGlobalSynchronous(context.Background())
b, err := NewSupervisorBackend(context.Background(), logger, m, cfg, ex)
require.NoError(t, err)

// Verify that failsafe state matches config after initialization
enabled, err := b.GetFailsafeEnabled(context.Background())
require.NoError(t, err)
require.Equal(t, tc.failsafeEnabled, enabled, "failsafe state should match config setting")
})
}
}
8 changes: 8 additions & 0 deletions op-supervisor/supervisor/backend/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ func (m *MockBackend) Rewind(ctx context.Context, chain eth.ChainID, block eth.B
return nil
}

func (m *MockBackend) SetFailsafeEnabled(ctx context.Context, enabled bool) error {
return nil
}

func (m *MockBackend) GetFailsafeEnabled(ctx context.Context) (bool, error) {
return false, nil
}

func (m *MockBackend) Close() error {
return nil
}
10 changes: 10 additions & 0 deletions op-supervisor/supervisor/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,13 @@ func (a *AdminFrontend) Rewind(ctx context.Context, chain eth.ChainID, block eth
// TODO(#15665) add logging here to track when rewinds are requested
return a.Supervisor.Rewind(ctx, chain, block)
}

// SetFailsafeEnabled sets the failsafe mode configuration for the supervisor.
func (a *AdminFrontend) SetFailsafeEnabled(ctx context.Context, enabled bool) error {
return a.Supervisor.SetFailsafeEnabled(ctx, enabled)
}

// GetFailsafeEnabled gets the current failsafe mode configuration for the supervisor.
func (a *AdminFrontend) GetFailsafeEnabled(ctx context.Context) (bool, error) {
return a.Supervisor.GetFailsafeEnabled(ctx)
}
2 changes: 2 additions & 0 deletions op-supervisor/supervisor/types/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,6 @@ var (
ErrNoRPCSource = errors.New("no RPC client configured")
// ErrUninitialized happens when a chain database is not initialized yet
ErrUninitialized = errors.New("uninitialized chain database")
// ErrFailsafeEnabled is when failsafe is enabled and the request is rejected
ErrFailsafeEnabled = errors.New("failsafe is enabled, rejecting all CheckAccessList requests")
)