diff --git a/sei-db/state_db/sc/hashvault/noop_hashvault.go b/sei-db/state_db/sc/hashvault/noop_hashvault.go new file mode 100644 index 0000000000..65df820cd8 --- /dev/null +++ b/sei-db/state_db/sc/hashvault/noop_hashvault.go @@ -0,0 +1,30 @@ +package hashvault + +import "context" + +var _ HashVault = (*NoopHashVault)(nil) + +// NoopHashVault is a HashVault implementation that does nothing. It provides no equivocation +// protection whatsoever. It exists for two purposes: +// - tests that construct a BlockExecutor but do not exercise the vault, and +// - the explicit, operator-opted-in "hash-vault-disabled-unsafe" escape hatch. +// +// Production code must never substitute this for a real vault without a deliberate human decision. +type NoopHashVault struct{} + +// NewNoopHashVault returns a HashVault whose methods are all no-ops. +func NewNoopHashVault() *NoopHashVault { + return &NoopHashVault{} +} + +func (n *NoopHashVault) CommitToHash(_ context.Context, _ uint64, _ []byte) error { + return nil +} + +func (n *NoopHashVault) Prune(_ context.Context, _ uint64) error { + return nil +} + +func (n *NoopHashVault) Close(_ context.Context) error { + return nil +} diff --git a/sei-db/state_db/sc/hashvault/pebble_hashvault.go b/sei-db/state_db/sc/hashvault/pebble_hashvault.go index e42f9e9caa..410c185dc6 100644 --- a/sei-db/state_db/sc/hashvault/pebble_hashvault.go +++ b/sei-db/state_db/sc/hashvault/pebble_hashvault.go @@ -217,9 +217,14 @@ func (p *PebbleHashVault) Close(_ context.Context) error { func (p *PebbleHashVault) logHashMismatch(blockHeight uint64, existing, incoming []byte) { logger.Error("Hashvault detected hash mismatch; node attempted to change its mind. "+ - "DO NOT RESTART WITHOUT HUMAN INVESTIGATION.", + "DO NOT RESTART WITHOUT HUMAN INVESTIGATION. If you are CERTAIN this is not a real "+ + "equivocation, you can bypass this guard by stopping the node and deleting the HashVault "+ + "data directory (hashVaultDir below), then restarting. WARNING: deleting it removes "+ + "equivocation protection — if the node then commits a conflicting hash for a height it has "+ + "already finalized, the validator may be SLASHED.", "blockHeight", blockHeight, "existingHex", hex.EncodeToString(existing), "incomingHex", hex.EncodeToString(incoming), + "hashVaultDir", p.config.DataDir, ) } diff --git a/sei-tendermint/config/config.go b/sei-tendermint/config/config.go index ac917d9a36..af201463bb 100644 --- a/sei-tendermint/config/config.go +++ b/sei-tendermint/config/config.go @@ -78,21 +78,27 @@ type Config struct { // AutobahnConfigFile is the path to a JSON file containing the Autobahn (GigaRouter) // configuration. Leave empty to disable Autobahn. AutobahnConfigFile string `mapstructure:"autobahn-config-file"` + + // HashVaultDisabledUnsafe disables the block-hash equivocation guard (HashVault). The vault is + // on by default (false). Setting this to true is an explicit, last-resort operator decision to + // run WITHOUT equivocation protection; the node logs loudly that it is unsafe. + HashVaultDisabledUnsafe bool `mapstructure:"hash-vault-disabled-unsafe"` } // DefaultConfig returns a default configuration for a Tendermint node func DefaultConfig() *Config { return &Config{ - BaseConfig: DefaultBaseConfig(), - RPC: DefaultRPCConfig(), - P2P: DefaultP2PConfig(), - Mempool: DefaultMempoolConfig(), - StateSync: DefaultStateSyncConfig(), - Consensus: DefaultConsensusConfig(), - TxIndex: DefaultTxIndexConfig(), - Instrumentation: DefaultInstrumentationConfig(), - PrivValidator: DefaultPrivValidatorConfig(), - SelfRemediation: DefaultSelfRemediationConfig(), + BaseConfig: DefaultBaseConfig(), + RPC: DefaultRPCConfig(), + P2P: DefaultP2PConfig(), + Mempool: DefaultMempoolConfig(), + StateSync: DefaultStateSyncConfig(), + Consensus: DefaultConsensusConfig(), + TxIndex: DefaultTxIndexConfig(), + Instrumentation: DefaultInstrumentationConfig(), + PrivValidator: DefaultPrivValidatorConfig(), + SelfRemediation: DefaultSelfRemediationConfig(), + HashVaultDisabledUnsafe: false, } } diff --git a/sei-tendermint/config/toml.go b/sei-tendermint/config/toml.go index 9f6f133c39..c3421b388a 100644 --- a/sei-tendermint/config/toml.go +++ b/sei-tendermint/config/toml.go @@ -636,6 +636,12 @@ restart-cooldown-seconds = {{ .SelfRemediation.RestartCooldownSeconds }} # Leave empty to disable Autobahn. autobahn-config-file = "{{ .AutobahnConfigFile }}" +# hash-vault-disabled-unsafe disables the block-hash equivocation guard (HashVault). +# DO NOT set this to true unless you are knowingly running an UNSAFE node as a last-resort +# recovery measure. A node with this enabled has NO protection against changing its mind about +# a committed block's hash, and will log error-level warnings on every startup. +hash-vault-disabled-unsafe = {{ .HashVaultDisabledUnsafe }} + ` // defaultConfigTemplate combines manual and auto-managed templates for backward compatibility diff --git a/sei-tendermint/internal/p2p/giga_router.go b/sei-tendermint/internal/p2p/giga_router.go index 66b94d7096..84905ac094 100644 --- a/sei-tendermint/internal/p2p/giga_router.go +++ b/sei-tendermint/internal/p2p/giga_router.go @@ -6,10 +6,12 @@ import ( "fmt" "maps" "net/url" + "path/filepath" "slices" "time" "github.com/ethereum/go-ethereum/common" + "github.com/sei-protocol/sei-chain/sei-db/state_db/sc/hashvault" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" atypes "github.com/sei-protocol/sei-chain/sei-tendermint/autobahn/types" "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" @@ -43,6 +45,11 @@ type GigaRouterConfig struct { Consensus *consensus.Config Producer *producer.Config GenDoc *types.GenesisDoc + + // HashVaultDisabledUnsafe disables the block-hash equivocation guard (HashVault). The guard is + // on by default (false); the GigaRouter builds and owns it (see Run). Setting this to true is an + // explicit, last-resort operator decision to run WITHOUT equivocation protection. + HashVaultDisabledUnsafe bool } type GigaRouter struct { @@ -55,6 +62,11 @@ type GigaRouter struct { poolIn *giga.Pool[NodePublicKey, rpc.Server[giga.API]] poolOut *giga.Pool[NodePublicKey, rpc.Client[giga.API]] + // hashVault is the block-hash equivocation guard. The GigaRouter owns its lifecycle: Run builds + // it (durable under PersistentStateDir, or a no-op when disabled / in-memory) and closes it on + // exit. Never nil once Run has started. See buildHashVault and commitHashToVault. + hashVault hashvault.HashVault + // lastCommitQCRecv is subscribed once at construction and reused for the // lifetime of the GigaRouter. Load() is lock-free (a single // atomic.Pointer.Load). @@ -293,6 +305,16 @@ func (r *GigaRouter) executeBlock(ctx context.Context, b *atypes.GlobalBlock) (* if err != nil { return nil, fmt.Errorf("r.cfg.App.FinalizeBlock(): %w", err) } + + // Commit this height's app hash to the equivocation guard before persisting app state, so the + // vault always records our commitment to a height before the state it implies is committed (and + // before the hash is proposed for AppQC voting via PushAppHash below). On restart the block is + // re-executed and the identical hash is re-committed idempotently. A returned error is a benign + // shutdown cancellation; genuine faults panic inside the call. See commitHashToVault. + if err := commitHashToVault(ctx, r.hashVault, b.GlobalNumber, resp.AppHash); err != nil { + return nil, err + } + commitResp, err := app.Commit(ctx) if err != nil { return nil, fmt.Errorf("r.cfg.App.Commit(): %w", err) @@ -303,6 +325,65 @@ func (r *GigaRouter) executeBlock(ctx context.Context, b *atypes.GlobalBlock) (* return commitResp, nil } +// buildHashVault constructs the block-hash equivocation guard the GigaRouter owns. By default it +// returns a durable Pebble-backed vault rooted at /hashvault, alongside the +// other Autobahn on-disk state. It returns a no-op vault (no protection) in two cases: the operator +// explicitly set HashVaultDisabledUnsafe — logged loudly as unsafe — or there is no persistent +// state directory (in-memory mode, e.g. tests), where a durable vault would be pointless because +// the data WAL is itself a no-op (see data.NewDataWAL). +func buildHashVault(ctx context.Context, cfg *GigaRouterConfig) (hashvault.HashVault, error) { + if cfg.HashVaultDisabledUnsafe { + logger.Error("################################################################") + logger.Error("# HASHVAULT DISABLED (hash-vault-disabled-unsafe=true). #") + logger.Error("# This node has NO block-hash equivocation protection and is #") + logger.Error("# running in an UNSAFE configuration. Re-enable as soon as the #") + logger.Error("# underlying issue is resolved. #") + logger.Error("################################################################") + return hashvault.NewNoopHashVault(), nil + } + dir, ok := cfg.Consensus.PersistentStateDir.Get() + if !ok { + logger.Info("HashVault: no persistent state dir (in-memory mode); using no-op vault") + return hashvault.NewNoopHashVault(), nil + } + hvCfg := hashvault.DefaultHashVaultConfig() + hvCfg.DataDir = filepath.Join(dir, "hashvault") + return hashvault.NewPebbleHashVault(ctx, hvCfg) +} + +// commitHashToVault records the app hash for the given height in the equivocation guard and halts +// the node on any error. Every executed height is guarded, so a node can never commit to two +// different app hashes for the same height without deliberate human intervention. +func commitHashToVault( + ctx context.Context, + vault hashvault.HashVault, + height atypes.GlobalBlockNumber, + hash []byte, +) error { + err := vault.CommitToHash(ctx, uint64(height), hash) + if err == nil { + return nil + } + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + logger.Info("HashVault commit aborted by context cancellation during shutdown; not recording hash", + "height", height, "err", err) + return fmt.Errorf("hashvault CommitToHash aborted at height %d: %w", height, err) + } + if errors.Is(err, hashvault.ErrHashMismatch) { + // The HashVault has already logged the conflicting hashes, its data directory, and the + // bypass/slashing guidance immediately before returning this error; don't duplicate it. + logger.Error("FATAL: HashVault detected a block-hash equivocation; halting. See the preceding "+ + "HashVault error for the conflicting hashes and recovery steps. "+ + "DO NOT RESTART WITHOUT HUMAN INTERVENTION.", + "height", height) + } else { + logger.Error("FATAL: HashVault could not commit the block hash (operational error, not a "+ + "confirmed equivocation). Halting.", + "height", height, "hashHex", fmt.Sprintf("%x", hash), "err", err) + } + panic(fmt.Sprintf("hashvault CommitToHash failed at height %d: %v", height, err)) +} + func (r *GigaRouter) runExecute(ctx context.Context) error { app := r.cfg.Producer.App @@ -364,10 +445,25 @@ func (r *GigaRouter) runExecute(ctx context.Context) error { if err := r.data.PruneBefore(pruneBefore); err != nil { return fmt.Errorf("r.data.PruneBefore(%v): %w", pruneBefore, err) } + // Align the vault's retention with the data layer's prune boundary. + if err := r.hashVault.Prune(ctx, uint64(pruneBefore)); err != nil { + logger.Error("failed to prune hashvault", "prune_before", pruneBefore, "err", err) + } } } func (r *GigaRouter) Run(ctx context.Context) error { + hashVault, err := buildHashVault(ctx, r.cfg) + if err != nil { + return fmt.Errorf("buildHashVault(): %w", err) + } + r.hashVault = hashVault + defer func() { + if err := hashVault.Close(context.Background()); err != nil { + logger.Error("failed to close hashvault", "err", err) + } + }() + return scope.Run(ctx, func(ctx context.Context, s scope.Scope) error { // Spawn outbound connections dialing. for _, addr := range r.cfg.ValidatorAddrs { diff --git a/sei-tendermint/node/setup.go b/sei-tendermint/node/setup.go index 2479a698c0..384707a2ef 100644 --- a/sei-tendermint/node/setup.go +++ b/sei-tendermint/node/setup.go @@ -369,6 +369,9 @@ func createRouter( if err != nil { return nil, closer, fmt.Errorf("buildGigaConfig: %w", err) } + // The GigaRouter builds and owns the equivocation guard itself; just pass the operator's + // enable/disable decision through as plain config. + gigaCfg.HashVaultDisabledUnsafe = cfg.HashVaultDisabledUnsafe // Resolve a relative persistent_state_dir against the node's --home dir, // matching how other paths in the tendermint config are handled // (config.go's rootify). Absolute paths pass through unchanged. None