From fd465a071849ff7e293d47ccc5f791d1cb53bdf4 Mon Sep 17 00:00:00 2001 From: grumbach Date: Wed, 20 May 2026 16:19:36 +0900 Subject: [PATCH 1/6] fix(upgrade): re-verify ML-DSA signature on every cache hit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared upgrade binary cache stored the extracted binary and, on a cache hit, returned it after only a SHA-256 check against a sibling .meta.json. SHA-256 is not a security control: anyone able to write to the shared cache directory (a co-located process, a shared container volume, a low-privilege foothold on the host) could drop a malicious binary plus a forged matching metadata hash, and the next ant-node instance to upgrade would execute it with no signature verification at all — persistent RCE on every co-located node. The ML-DSA-65 signature covers the archive and was only checked on the initial download, never on a cache hit. Changes: - Cache the signed *archive + detached signature* instead of the extracted binary. `BinaryCache::get_verified_archive` re-runs ML-DSA-65 verification on every cache hit; the binary is always extracted fresh from the just-verified archive. A tampered archive, tampered or missing signature, or forged metadata fails verification against the pinned release public key, so a poisoned cache entry is rejected and a fresh verified download runs. - Stage cached files into the caller's process-private temp directory and verify that copy, then extract from the same private path. Closes the verify-vs-extract TOCTOU on the shared cache files: an attacker cannot swap the bytes between when the verifier reads them and when the extractor reads them. - Size policy before any copy or read. `fs::symlink_metadata` + `file_type().is_file()` rejects symlinks / FIFOs / devices outright; archive size is bounded by `MAX_ARCHIVE_SIZE_BYTES` and the signature must be exactly `SIGNATURE_SIZE` bytes. Otherwise an attacker could plant `cached.archive -> /dev/zero` (stats as 0 bytes) and force unbounded disk fill in the staging dir or OOM in `signature::verify`. - Cache only after successful extraction. A validly-signed-but-malformed release no longer becomes a shared cache poison pill that every later node downloads, fails to extract, and re-downloads. - `cache_dir.rs` restricts the shared upgrade cache directory to 0700 on Unix as defence in depth; the ML-DSA gate is the primary control. - `store_archive` mirrors the same size / file-type / signature checks before persisting, so a poisoned entry cannot be created through the supported path either. Tests in `src/upgrade/binary_cache.rs` cover the tamper path (SHA-256-forged swap on disk rejected by the signature re-check), the post-hit shared-file swap (private copy unaffected), the symlink-to- `/dev/zero` bypass attempt, oversize archive / wrong-sized signature rejection, and round-trip storage. Production verifies against the pinned `RELEASE_SIGNING_KEY`; tests use a `#[cfg(test)]`-only constructor that injects a generated key without weakening the production trust anchor. Residual: cache entries are not bound to a specific release version (the ML-DSA signing context is constant across versions), so a same-UID attacker who already has any past validly-signed release can plant it under a newer version's cache key and force a downgrade to that old signed binary. Not RCE (still legitimately-signed bytes) and a same-UID attacker has easier paths anyway; closing it cleanly requires coordinated changes in the release-signing pipeline, ant-keygen, ant-node, and ant-client, and is tracked in the binary_cache module docs. --- src/upgrade/apply.rs | 93 +++-- src/upgrade/binary_cache.rs | 678 +++++++++++++++++++++++++++++++----- src/upgrade/cache_dir.rs | 21 ++ 3 files changed, 668 insertions(+), 124 deletions(-) diff --git a/src/upgrade/apply.rs b/src/upgrade/apply.rs index 9d19870a..7af8c7d7 100644 --- a/src/upgrade/apply.rs +++ b/src/upgrade/apply.rs @@ -20,7 +20,7 @@ use std::path::{Path, PathBuf}; use tar::Archive; /// Maximum allowed upgrade archive size (200 MiB). -const MAX_ARCHIVE_SIZE_BYTES: usize = 200 * 1024 * 1024; +pub(super) const MAX_ARCHIVE_SIZE_BYTES: usize = 200 * 1024 * 1024; /// Exit code that signals the service manager to restart the process. /// @@ -176,9 +176,24 @@ impl AutoApplyUpgrader { .parent() .ok_or_else(|| Error::Upgrade("Cannot determine binary directory".to_string()))?; - // Create temp directory for upgrade - let temp_dir = tempfile::Builder::new() - .prefix("ant-upgrade-") + // Create temp directory for upgrade. + // + // On Unix, create it with 0700 so a same-host attacker on a different + // UID cannot read/write the staging area between when the cache + // re-verifies the ML-DSA signature on a private copy and when + // `extract_binary` reads it (closes a verify-vs-extract TOCTOU on + // the staging directory). The `tempfile::Builder::permissions` + // path is supported on tempfile 3 — on platforms that don't honour + // it the call is a no-op and the ML-DSA verification on the + // private copy still bounds the residual. + let mut tempdir_builder = tempfile::Builder::new(); + tempdir_builder.prefix("ant-upgrade-"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + tempdir_builder.permissions(std::fs::Permissions::from_mode(0o700)); + } + let temp_dir = tempdir_builder .tempdir_in(binary_dir) .map_err(|e| Error::Upgrade(format!("Failed to create temp dir: {e}")))?; @@ -317,21 +332,26 @@ impl AutoApplyUpgrader { version_str: &str, ) -> Result { if let Some(ref cache) = self.binary_cache { - // Fast path — cache hit without locking - if let Some(cached_path) = cache.get_verified(version_str) { - info!("Cached binary verified for version {}", version_str); - let dest = dest_dir.join( - cached_path - .file_name() - .unwrap_or_else(|| std::ffi::OsStr::new("ant-node")), - ); - if let Err(e) = fs::copy(&cached_path, &dest) { - warn!("Failed to copy from cache, will re-download: {e}"); - return self - .download_verify_extract(info, dest_dir, Some(cache)) - .await; + // Fast path — cache hit without locking. The cache re-verifies + // the ML-DSA signature over the archive on every call, so a + // tampered cache entry returns None here and we fall through to + // a fresh, fully verified download. + // `dest_dir` is this upgrade's process-private temp dir, so the + // cache stages + verifies the archive there; extraction then + // reads exactly the verified bytes (no shared-file TOCTOU). + if let Some(verified_archive) = cache.get_verified_archive(version_str, dest_dir) { + match Self::extract_binary(&verified_archive, dest_dir) { + Ok(binary) => { + info!("Reused signature-verified cached archive for {version_str}"); + return Ok(binary); + } + Err(e) => { + warn!("Failed to extract from cached archive, will re-download: {e}"); + return self + .download_verify_extract(info, dest_dir, Some(cache)) + .await; + } } - return Ok(dest); } // Cache miss — acquire exclusive download lock via spawn_blocking @@ -345,19 +365,15 @@ impl AutoApplyUpgrader { .await .map_err(|e| Error::Upgrade(format!("Lock task failed: {e}")))??; - // Re-check cache under the lock — another node may have populated it - if let Some(cached_path) = cache.get_verified(version_str) { - info!( - "Cached binary became available under lock for version {}", - version_str - ); - let dest = dest_dir.join( - cached_path - .file_name() - .unwrap_or_else(|| std::ffi::OsStr::new("ant-node")), - ); - fs::copy(&cached_path, &dest)?; - return Ok(dest); + // Re-check cache under the lock — another node may have populated + // it. Same re-verification guarantee as the fast path. + if let Some(verified_archive) = cache.get_verified_archive(version_str, dest_dir) { + if let Ok(binary) = Self::extract_binary(&verified_archive, dest_dir) { + info!( + "Signature-verified cached archive became available under lock for {version_str}" + ); + return Ok(binary); + } } // Still missing — download while holding the lock @@ -400,15 +416,22 @@ impl AutoApplyUpgrader { signature::verify_from_file(&archive_path, &sig_path)?; info!("Archive signature verified successfully"); - // Step 4: Extract binary from verified archive + // Step 4: Extract binary from the just-verified archive. info!("Extracting binary from archive..."); let extracted_binary = Self::extract_binary(&archive_path, dest_dir)?; - // Store in binary cache if available + // Step 5: Cache the signature-verified ARCHIVE (+ its signature) + // AFTER successful extraction. We cache the signed artifact, never + // the extracted binary, so every later cache hit can re-verify the + // signature. Caching only after extract proves the archive is + // actually usable on this platform avoids turning a + // validly-signed-but-malformed release into a shared cache poison + // pill (every later node would hit cache, fail extract, and + // re-download). if let Some(c) = cache { let version_str = info.version.to_string(); - if let Err(e) = c.store(&version_str, &extracted_binary) { - warn!("Failed to store binary in cache: {e}"); + if let Err(e) = c.store_archive(&version_str, &archive_path, &sig_path) { + warn!("Failed to store verified archive in cache: {e}"); } } diff --git a/src/upgrade/binary_cache.rs b/src/upgrade/binary_cache.rs index 43aeb2ff..b708a1c5 100644 --- a/src/upgrade/binary_cache.rs +++ b/src/upgrade/binary_cache.rs @@ -1,124 +1,400 @@ -//! Disk cache for downloaded upgrade binaries. +//! Disk cache for downloaded upgrade archives. //! //! When multiple ant-node instances detect the same upgrade, only the first -//! one needs to download and verify the archive. `BinaryCache` stores the -//! extracted binary alongside a SHA-256 integrity metadata file so that -//! subsequent nodes can copy it directly. +//! one needs to download the archive. `BinaryCache` stores the **signed +//! archive together with its detached ML-DSA-65 signature** so that +//! subsequent nodes can reuse it. //! -//! **Security note:** SHA-256 is used only for cache integrity (detecting -//! corruption or partial writes). The actual security gate remains the -//! ML-DSA-65 signature verification performed during the initial download. +//! ## Security model +//! +//! The ML-DSA-65 signature is the security gate, and it covers the *archive* +//! bytes — not the extracted binary. A previous version cached the extracted +//! binary and, on a cache hit, returned it after only a SHA-256 check against +//! a sibling metadata file. SHA-256 is not a security control: anyone able to +//! write to the shared cache directory (a co-located process, a shared +//! container volume, a low-privilege foothold) could replace the cached +//! binary and its `.meta.json` with a matching hash, and the next node would +//! execute it **without any signature verification** — persistent RCE. +//! +//! This module now caches the *archive + signature* and, on **every** cache +//! hit, re-runs ML-DSA-65 verification over the cached archive before it is +//! used. A tampered archive fails verification (the release key is pinned in +//! the binary and cannot be forged); a tampered or missing signature fails +//! likewise. The extracted binary is always derived fresh from the +//! just-verified archive by the caller, so a poisoned cache entry can never +//! be executed. The SHA-256 metadata is retained only as a fast corruption +//! pre-check, never as the trust decision. +//! +//! ## Residual: cache entries are not bound to a specific release version +//! +//! `signature::SIGNING_CONTEXT = "ant-node-release-v1"` is constant across +//! versions, so the ML-DSA signature attests to "this archive is a valid +//! ant-node release", not "this archive is release X.Y.Z". An attacker with +//! cache-dir write access who possesses any past validly-signed release can +//! plant it under a newer version's cache key; the next node performing +//! that upgrade accepts it and runs it as the newer version. Net effect: +//! forced downgrade or wrong-arch crash loop, not arbitrary RCE. +//! +//! This is out of scope of the cache-poisoning RCE class this module +//! addresses (which trusted SHA-256 alone on cache hits): the `cache_dir` +//! is `0o700` (defence in depth, see `cache_dir.rs`) and the attacker +//! already needs same-UID write to exploit this — they can replace the +//! running binary directly. Closing the gap properly requires upstream +//! release-signing changes (the signing context must include the version +//! string, e.g. `b"ant-node-release-v1:1.2.3"`) and is tracked as a +//! follow-up. use crate::error::{Error, Result}; use crate::logging::{debug, warn}; +use crate::upgrade::signature; use fs2::FileExt; +use saorsa_pqc::api::sig::MlDsaPublicKey; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::fs::{self, File}; use std::io::{Read, Write}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; -/// On-disk cache for downloaded upgrade binaries. +/// On-disk cache for downloaded, signature-verified upgrade archives. #[derive(Clone)] pub struct BinaryCache { - /// Directory that holds cached binaries and metadata. + /// Directory that holds cached archives, signatures, and metadata. cache_dir: PathBuf, + /// Verification key override. `None` in production → the pinned release + /// key embedded in [`signature`] is used (the real, unforgeable gate). + /// Only ever `Some` via the `#[cfg(test)]` constructor, so test builds + /// can exercise the cache with a generated keypair without weakening the + /// production trust anchor in any way. + verify_key: Option, } -/// Metadata written alongside each cached binary. +/// Metadata written alongside each cached archive. +/// +/// The SHA-256 here is a fast integrity/corruption pre-check only. It is +/// **not** a security control: the ML-DSA-65 signature over the archive is +/// re-verified on every cache hit regardless of this value. #[derive(Serialize, Deserialize)] -struct CachedBinaryMeta { +struct CachedArchiveMeta { /// Semantic version string (e.g. "1.2.3"). version: String, - /// Hex-encoded SHA-256 digest of the cached binary. - sha256: String, - /// When the binary was cached (seconds since UNIX epoch). + /// Hex-encoded SHA-256 digest of the cached archive (corruption check). + archive_sha256: String, + /// When the archive was cached (seconds since UNIX epoch). cached_at_epoch_secs: u64, } impl BinaryCache { /// Create a new binary cache backed by the given directory. + /// + /// Production constructor: the cache verifies cached archives against the + /// pinned release public key embedded in the binary. #[must_use] pub fn new(cache_dir: PathBuf) -> Self { - Self { cache_dir } + Self { + cache_dir, + verify_key: None, + } } - /// Return the path where a cached binary for `version` would be stored. + /// Test-only constructor that verifies against an explicit public key + /// instead of the pinned release key (the production trust anchor is + /// unchanged; this only exists so unit tests can produce verifiable + /// signatures with a generated keypair). + #[cfg(test)] #[must_use] - pub fn cached_binary_path(&self, version: &str) -> PathBuf { - let name = if cfg!(windows) { - format!("ant-node-{version}.exe") - } else { - format!("ant-node-{version}") - }; - self.cache_dir.join(name) + pub fn new_with_verify_key(cache_dir: PathBuf, verify_key: MlDsaPublicKey) -> Self { + Self { + cache_dir, + verify_key: Some(verify_key), + } } - /// Return the cached binary path if it exists and its SHA-256 matches - /// the stored metadata. Returns `None` on any mismatch or error. + /// Path of the cached archive for `version`. #[must_use] - pub fn get_verified(&self, version: &str) -> Option { - let bin_path = self.cached_binary_path(version); + pub fn cached_archive_path(&self, version: &str) -> PathBuf { + self.cache_dir.join(format!("ant-node-{version}.archive")) + } + + /// Path of the cached detached signature for `version`. + #[must_use] + fn cached_signature_path(&self, version: &str) -> PathBuf { + self.cache_dir.join(format!("ant-node-{version}.sig")) + } + + /// Verify `archive` against `sig` using the pinned release key in + /// production, or the injected test key under `#[cfg(test)]`. + fn verify_archive(&self, archive: &Path, sig: &Path) -> Result<()> { + self.verify_key.as_ref().map_or_else( + || signature::verify_from_file(archive, sig), + |key| signature::verify_from_file_with_key(archive, sig, key), + ) + } + + /// Copy the cached archive into the caller-private `private_dir`, + /// **cryptographically re-verify that private copy**, and return its + /// path — or `None` if there is no usable, trusted cache entry. + /// + /// On every call this: + /// 1. loads the sibling metadata and checks the version matches, + /// 2. copies the cached archive + signature into `private_dir` (a + /// location only this process writes, e.g. the per-upgrade temp dir), + /// 3. SHA-256 pre-checks the private copy against the metadata (fast + /// corruption check), then + /// 4. **re-verifies the ML-DSA-65 signature over the private copy** with + /// the pinned release key — the actual security gate. + /// + /// Verifying the *private copy* (not the shared cache file) closes the + /// TOCTOU window: an attacker with write access to the shared cache dir + /// cannot swap the bytes between verification and extraction, because the + /// caller extracts from the returned private path, which is the exact + /// byte sequence that was verified and is unreachable to the attacker. + /// + /// Any failure (missing/corrupt metadata, copy error, hash mismatch, + /// missing signature, or — critically — a signature that does not verify + /// against the pinned release key) returns `None`, forcing a fresh, + /// fully verified download. + /// + /// The caller MUST extract the binary from the returned (private) archive + /// path, so the executed bytes always derive from signature-verified + /// input that no other principal could have modified post-verification. + #[must_use] + pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option { + let cached_archive = self.cached_archive_path(version); + let cached_sig = self.cached_signature_path(version); let meta_path = self.meta_path(version); let meta_data = fs::read_to_string(&meta_path).ok()?; - let meta: CachedBinaryMeta = serde_json::from_str(&meta_data).ok()?; + let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?; if meta.version != version { debug!("Binary cache version mismatch in metadata"); return None; } - let actual_hash = sha256_file(&bin_path).ok()?; - if actual_hash != meta.sha256 { + // Size policy gate — runs BEFORE we copy or read the cached files. + // + // `signature::verify_from_file*` and `sha256_file` both load the + // archive into memory in full. An attacker with cache-dir write + // access could otherwise drop a multi-GB `.archive` and force disk + // exhaustion in the staging dir or an OOM during re-verification + // before the entry is rejected. The download path already enforces + // `MAX_ARCHIVE_SIZE_BYTES`; cache hits must honour the same bound, + // plus the fixed `SIGNATURE_SIZE`. + // Use `symlink_metadata` (does NOT follow symlinks) and require a + // regular file. Otherwise a cache-dir writer could plant + // `ant-node-X.archive -> /dev/zero` (or a FIFO/device) whose `.len()` + // stats as 0 — passing a `fs::metadata` size check while + // `fs::copy` then reads indefinitely from the underlying special + // file, exhausting disk in the private staging dir. + let archive_meta = match fs::symlink_metadata(&cached_archive) { + Ok(m) => m, + Err(e) => { + debug!("Cannot stat cached archive for {version}: {e}"); + return None; + } + }; + if !archive_meta.file_type().is_file() { + warn!( + "Cached archive for {version} is not a regular file \ + (symlink/special); discarding cache entry" + ); + return None; + } + if archive_meta.len() > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 { + warn!( + "Cached archive for {version} exceeds MAX_ARCHIVE_SIZE_BYTES \ + ({} bytes); discarding cache entry", + archive_meta.len() + ); + return None; + } + let sig_meta = match fs::symlink_metadata(&cached_sig) { + Ok(m) => m, + Err(e) => { + debug!("Cannot stat cached signature for {version}: {e}"); + return None; + } + }; + if !sig_meta.file_type().is_file() { + warn!( + "Cached signature for {version} is not a regular file \ + (symlink/special); discarding cache entry" + ); + return None; + } + if sig_meta.len() != signature::SIGNATURE_SIZE as u64 { + warn!( + "Cached signature for {version} has wrong size ({} bytes, \ + expected {}); discarding cache entry", + sig_meta.len(), + signature::SIGNATURE_SIZE + ); + return None; + } + + // Copy archive + signature into the caller-private directory. + // Everything below operates only on these private copies, which the + // attacker cannot reach — eliminating any verify/extract TOCTOU on + // the shared cache files. + let private_archive = private_dir.join(format!("cached-{version}.archive")); + let private_sig = private_dir.join(format!("cached-{version}.sig")); + + // Cleanup helper defined BEFORE any copy so even a partially-created + // destination from a failed copy is removed on every rejection path. + let cleanup = |reason: &str| { + debug!("Cleaning staged cache copy for {version}: {reason}"); + let _ = fs::remove_file(&private_archive); + let _ = fs::remove_file(&private_sig); + }; + + if let Err(e) = fs::copy(&cached_archive, &private_archive) { + debug!("Could not stage cached archive for {version}: {e}"); + cleanup("archive copy failed"); + return None; + } + if let Err(e) = fs::copy(&cached_sig, &private_sig) { + debug!("Could not stage cached signature for {version}: {e}"); + cleanup("signature copy failed"); + return None; + } + + // Fast corruption pre-check on the PRIVATE copy (NOT the security + // decision). A copy error or truncation surfaces here. + let actual_hash = match sha256_file(&private_archive) { + Ok(h) => h, + Err(e) => { + cleanup(&format!("sha256 read failed: {e}")); + return None; + } + }; + if actual_hash != meta.archive_sha256 { + warn!( + "Binary cache SHA-256 mismatch for version {version} \ + (expected {}, got {actual_hash}) — ignoring cache entry", + meta.archive_sha256 + ); + cleanup("sha256 mismatch"); + return None; + } + + // THE SECURITY GATE: re-verify the ML-DSA-65 signature over the + // PRIVATE archive copy on every hit. The returned path is this same + // private copy, so the caller extracts exactly the bytes that were + // verified — a cache entry tampered with on disk (binary/archive + // swap, forged metadata, or a post-verify swap attempt) cannot + // produce a private copy whose signature verifies against the + // pinned release key. + if let Err(e) = self.verify_archive(&private_archive, &private_sig) { warn!( - "Binary cache SHA-256 mismatch for version {version} (expected {}, got {})", - meta.sha256, actual_hash + "Cached archive for version {version} FAILED ML-DSA signature \ + re-verification ({e}); discarding cache entry (possible \ + on-disk tampering). A fresh verified download will run." ); + cleanup("signature re-verification failed"); return None; } - Some(bin_path) + debug!("Cached archive for version {version} passed ML-DSA re-verification"); + Some(private_archive) } - /// Store a binary in the cache. + /// Store a signature-verified archive in the cache. /// - /// Uses a write-to-temp-then-rename strategy so that readers never - /// observe partially written files. The metadata file is written last - /// so that `get_verified` only succeeds once both files are complete. + /// Both files are persisted (via write-to-temp-then-rename so readers + /// never observe partial writes); the metadata file is written last so + /// [`get_verified_archive`](Self::get_verified_archive) only succeeds + /// once every file is complete. + /// + /// Defence in depth: this re-verifies the archive against its signature + /// before caching, so a poisoned entry cannot be created through the + /// supported path even if a caller forgot to verify first. /// /// # Errors /// - /// Returns an error if the binary cannot be read or the cache files - /// cannot be written. - pub fn store(&self, version: &str, source_path: &std::path::Path) -> Result<()> { - let hash = sha256_file(source_path)?; + /// Returns an error if the signature does not verify, the inputs cannot + /// be read, or the cache files cannot be written. + pub fn store_archive( + &self, + version: &str, + archive_path: &Path, + signature_path: &Path, + ) -> Result<()> { + // Defence in depth: refuse to persist a non-regular file, an + // oversize archive, or a misshapen signature — mirroring the + // `get_verified_archive` cache-hit policy. `symlink_metadata` + // refuses to chase a symlink the caller may have planted. + let archive_meta = fs::symlink_metadata(archive_path)?; + if !archive_meta.file_type().is_file() { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: source is not a \ + regular file (symlink/special)" + ))); + } + let archive_len = archive_meta.len(); + if archive_len > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: size {archive_len} bytes \ + exceeds MAX_ARCHIVE_SIZE_BYTES" + ))); + } + let sig_meta = fs::symlink_metadata(signature_path)?; + if !sig_meta.file_type().is_file() { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature is not a \ + regular file (symlink/special)" + ))); + } + let sig_len = sig_meta.len(); + if sig_len != signature::SIGNATURE_SIZE as u64 { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature size {sig_len} \ + bytes, expected {}", + signature::SIGNATURE_SIZE + ))); + } + + self.verify_archive(archive_path, signature_path) + .map_err(|e| { + Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature does not verify ({e})" + )) + })?; + + let archive_hash = sha256_file(archive_path)?; - let dest = self.cached_binary_path(version); + let dest_archive = self.cached_archive_path(version); + let dest_sig = self.cached_signature_path(version); let meta_path = self.meta_path(version); - // Write binary to a temp file then rename into place. - // Remove dest first on Windows where rename fails if it exists. - let tmp_bin = self.cache_dir.join(format!(".ant-node-{version}.tmp")); - fs::copy(source_path, &tmp_bin)?; - let _ = fs::remove_file(&dest); - fs::rename(&tmp_bin, &dest)?; + Self::atomic_copy( + archive_path, + &dest_archive, + &self + .cache_dir + .join(format!(".ant-node-{version}.archive.tmp")), + )?; + Self::atomic_copy( + signature_path, + &dest_sig, + &self.cache_dir.join(format!(".ant-node-{version}.sig.tmp")), + )?; let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Upgrade(format!("System clock error: {e}")))? .as_secs(); - let meta = CachedBinaryMeta { + let meta = CachedArchiveMeta { version: version.to_string(), - sha256: hash, + archive_sha256: archive_hash, cached_at_epoch_secs: now, }; let meta_json = serde_json::to_string(&meta) .map_err(|e| Error::Upgrade(format!("Failed to serialize binary cache meta: {e}")))?; - // Write metadata to a temp file then rename into place + // Metadata written last so a reader never sees a complete meta file + // pointing at an incomplete archive/signature pair. let tmp_meta = self.cache_dir.join(format!(".ant-node-{version}.meta.tmp")); let mut f = File::create(&tmp_meta)?; f.write_all(meta_json.as_bytes())?; @@ -127,7 +403,10 @@ impl BinaryCache { let _ = fs::remove_file(&meta_path); fs::rename(&tmp_meta, &meta_path)?; - debug!("Cached binary for version {version} at {}", dest.display()); + debug!( + "Cached verified archive for version {version} at {}", + dest_archive.display() + ); Ok(()) } @@ -135,11 +414,11 @@ impl BinaryCache { /// /// This prevents multiple nodes from downloading the same archive /// concurrently — the first acquires the lock and downloads, the rest - /// wait and then find the binary already cached. + /// wait and then find the archive already cached. /// /// The lock is released when the returned guard is dropped. /// - /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in + /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in /// async contexts should wrap this call in `tokio::task::spawn_blocking`. /// /// # Errors @@ -156,13 +435,17 @@ impl BinaryCache { // -- private helpers ----------------------------------------------------- + /// Copy `src` to `dest` atomically via a temp file + rename. + fn atomic_copy(src: &Path, dest: &Path, tmp: &Path) -> Result<()> { + fs::copy(src, tmp)?; + // Remove dest first on Windows where rename fails if it exists. + let _ = fs::remove_file(dest); + fs::rename(tmp, dest)?; + Ok(()) + } + fn meta_path(&self, version: &str) -> PathBuf { - let name = if cfg!(windows) { - format!("ant-node-{version}.exe.meta.json") - } else { - format!("ant-node-{version}.meta.json") - }; - self.cache_dir.join(name) + self.cache_dir.join(format!("ant-node-{version}.meta.json")) } } @@ -174,7 +457,7 @@ pub struct DownloadLockGuard { } /// Compute the hex-encoded SHA-256 digest of a file. -fn sha256_file(path: &std::path::Path) -> Result { +fn sha256_file(path: &Path) -> Result { let mut file = File::open(path)?; let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; @@ -198,58 +481,275 @@ fn sha256_file(path: &std::path::Path) -> Result { #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { use super::*; + use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey}; + use std::sync::OnceLock; use tempfile::TempDir; + /// One generated keypair for the whole test module (keygen is expensive). + fn test_keypair() -> &'static (MlDsaPublicKey, MlDsaSecretKey) { + static KP: OnceLock<(MlDsaPublicKey, MlDsaSecretKey)> = OnceLock::new(); + KP.get_or_init(|| ml_dsa_65().generate_keypair().unwrap()) + } + + fn cache_with_test_key(dir: &Path) -> BinaryCache { + BinaryCache::new_with_verify_key(dir.to_path_buf(), test_keypair().0.clone()) + } + + /// A caller-private staging directory (the per-upgrade temp dir in + /// production). Returned so it outlives the call. + fn priv_dir() -> TempDir { + TempDir::new().unwrap() + } + + /// Write an archive + a valid detached signature over it. + fn make_signed_archive(dir: &Path, contents: &[u8]) -> (PathBuf, PathBuf) { + let archive = dir.join("src-archive"); + fs::write(&archive, contents).unwrap(); + let sig = ml_dsa_65() + .sign_with_context(&test_keypair().1, contents, signature::SIGNING_CONTEXT) + .unwrap(); + let sig_path = dir.join("src-archive.sig"); + fs::write(&sig_path, sig.to_bytes()).unwrap(); + (archive, sig_path) + } + #[test] fn test_miss_returns_none() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); - assert!(cache.get_verified("1.0.0").is_none()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } #[test] - fn test_store_and_get_verified() { + fn test_store_and_get_verified_archive() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"signed archive bytes"); + cache.store_archive("1.2.3", &archive, &sig).unwrap(); + + let got = cache + .get_verified_archive("1.2.3", pd.path()) + .expect("cache hit"); + assert_eq!(fs::read(&got).unwrap(), b"signed archive bytes"); + // The returned path must be the PRIVATE copy, not the shared cache + // file (that is what closes the verify/extract TOCTOU). + assert!( + got.starts_with(pd.path()), + "returned archive must be the caller-private copy, got {got:?}" + ); + assert_ne!(got, cache.cached_archive_path("1.2.3")); + } - // Create a fake binary - let src = tmp.path().join("source-bin"); - fs::write(&src, b"hello world binary").unwrap(); + #[test] + fn test_store_rejects_unsigned_archive() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); - cache.store("1.2.3", &src).unwrap(); + let archive = tmp.path().join("a"); + fs::write(&archive, b"unsigned").unwrap(); + let bad_sig = tmp.path().join("a.sig"); + fs::write(&bad_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap(); - let result = cache.get_verified("1.2.3"); - assert!(result.is_some()); - let cached_path = result.unwrap(); - assert_eq!(fs::read(&cached_path).unwrap(), b"hello world binary"); + assert!(cache.store_archive("1.0.0", &archive, &bad_sig).is_err()); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } + /// An attacker who swaps the cached archive on disk (and even forges a + /// matching SHA-256 in the metadata) cannot get it trusted, because + /// the ML-DSA signature is re-verified on every hit. #[test] - fn test_sha256_mismatch_returns_none() { + fn test_tampered_cached_archive_is_rejected() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"legit release archive"); + cache.store_archive("2.0.0", &archive, &sig).unwrap(); + assert!(cache.get_verified_archive("2.0.0", pd.path()).is_some()); + + // Attacker overwrites the cached archive with a malicious payload... + let cached_archive = cache.cached_archive_path("2.0.0"); + fs::write(&cached_archive, b"malicious payload").unwrap(); + + // ...and forges the metadata SHA-256 so the corruption pre-check passes. + let forged_hash = { + let mut h = Sha256::new(); + h.update(b"malicious payload"); + hex::encode(h.finalize()) + }; + let meta = CachedArchiveMeta { + version: "2.0.0".to_string(), + archive_sha256: forged_hash, + cached_at_epoch_secs: 0, + }; + fs::write( + cache.meta_path("2.0.0"), + serde_json::to_string(&meta).unwrap(), + ) + .unwrap(); + + // The SHA-256 pre-check now passes, but ML-DSA re-verification of the + // swapped archive against the key fails → entry rejected. + assert!( + cache.get_verified_archive("2.0.0", pd.path()).is_none(), + "tampered cache entry must NOT be trusted even with a forged \ + matching SHA-256 — the signature gate runs on every hit" + ); + } - // Store a valid binary - let src = tmp.path().join("source-bin"); - fs::write(&src, b"original content").unwrap(); - cache.store("1.0.0", &src).unwrap(); + /// TOCTOU defence: even if an attacker swaps the *shared* cache archive + /// for malicious bytes immediately after a hit, the previously returned + /// path (a caller-private copy) still contains the verified bytes, so + /// what gets extracted/executed is exactly what was signature-verified. + #[test] + fn test_returned_archive_is_private_copy_immune_to_post_hit_swap() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"the real signed release"); + cache.store_archive("3.0.0", &archive, &sig).unwrap(); + + let verified = cache + .get_verified_archive("3.0.0", pd.path()) + .expect("cache hit"); + + // Attacker swaps the SHARED cache archive right after verification. + fs::write( + cache.cached_archive_path("3.0.0"), + b"post-verify malicious swap", + ) + .unwrap(); + + // The path the caller will extract from is the private copy and is + // unaffected by the shared-file swap. + assert_eq!( + fs::read(&verified).unwrap(), + b"the real signed release", + "extraction must read the verified private bytes, not the \ + attacker's post-verification swap" + ); + } - // Corrupt the cached binary - let cached = cache.cached_binary_path("1.0.0"); - fs::write(&cached, b"corrupted content").unwrap(); + #[test] + fn test_missing_signature_returns_none() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"data"); + cache.store_archive("1.0.0", &archive, &sig).unwrap(); - assert!(cache.get_verified("1.0.0").is_none()); + // Attacker deletes the signature to try to skip verification. + fs::remove_file(cache.cached_signature_path("1.0.0")).unwrap(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } #[test] fn test_missing_meta_returns_none() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + let (archive, sig) = make_signed_archive(tmp.path(), b"data"); + cache.store_archive("1.0.0", &archive, &sig).unwrap(); + fs::remove_file(cache.meta_path("1.0.0")).unwrap(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); + } + + /// Size policy: an attacker with cache-dir write cannot OOM/disk-exhaust + /// the verifier by dropping a multi-GB archive — `get_verified_archive` + /// stat-checks the cached archive against `MAX_ARCHIVE_SIZE_BYTES` BEFORE + /// any copy or `fs::read` reaches `signature::verify_from_file`. + #[test] + fn test_oversize_cached_archive_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a real signed entry so the meta/sig pass earlier checks… + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("3.1.0", &archive, &sig).unwrap(); + // …then truncate-grow the cached archive past the limit. + let cached_archive = cache.cached_archive_path("3.1.0"); + let oversize = crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1; + { + let f = File::create(&cached_archive).unwrap(); + f.set_len(oversize).unwrap(); + } + + // The size gate rejects pre-copy → no private archive ever staged. + assert!(cache.get_verified_archive("3.1.0", pd.path()).is_none()); + let private_archive = pd.path().join("cached-3.1.0.archive"); + assert!( + !private_archive.exists(), + "oversize entry must NOT be staged into private dir" + ); + } + + #[test] + fn test_wrong_size_signature_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); - // Write a binary but no meta file - let cached = cache.cached_binary_path("1.0.0"); - fs::write(&cached, b"binary data").unwrap(); + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("3.2.0", &archive, &sig).unwrap(); + // Replace the cached signature with the wrong size. + fs::write(cache.cached_signature_path("3.2.0"), b"too-short").unwrap(); - assert!(cache.get_verified("1.0.0").is_none()); + assert!(cache.get_verified_archive("3.2.0", pd.path()).is_none()); + } + + /// `store_archive` itself refuses to persist an oversize archive — even + /// from a (hypothetically) misbehaving caller that bypassed the + /// download-time size cap. + #[test] + fn test_store_archive_rejects_oversize() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + + // Make a sparse "archive" past the limit and any signature. + let big = tmp.path().join("big.archive"); + { + let f = File::create(&big).unwrap(); + f.set_len(crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1) + .unwrap(); + } + let any_sig = tmp.path().join("any.sig"); + fs::write(&any_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap(); + + assert!(cache.store_archive("9.9.9", &big, &any_sig).is_err()); + } + + /// Round-3 regression: a cache-dir writer cannot bypass the size gate + /// by planting a symlink whose `stat(2)` size is small but whose + /// target reads indefinitely (e.g. `/dev/zero`). `symlink_metadata` + /// + `is_file()` rejects the entry before any `fs::copy` reads it. + #[cfg(unix)] + #[test] + fn test_symlink_cached_archive_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a legit signed entry so meta/version/sig-size are good… + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("4.0.0", &archive, &sig).unwrap(); + // …then replace the cached archive with a symlink to /dev/zero. + let cached_archive = cache.cached_archive_path("4.0.0"); + fs::remove_file(&cached_archive).unwrap(); + std::os::unix::fs::symlink("/dev/zero", &cached_archive).unwrap(); + + assert!( + cache.get_verified_archive("4.0.0", pd.path()).is_none(), + "a symlinked cached archive must be rejected pre-copy, \ + not chased into /dev/zero" + ); + // Nothing should have been staged. + assert!(!pd.path().join("cached-4.0.0.archive").exists()); } } diff --git a/src/upgrade/cache_dir.rs b/src/upgrade/cache_dir.rs index 75458e96..aa099ddb 100644 --- a/src/upgrade/cache_dir.rs +++ b/src/upgrade/cache_dir.rs @@ -26,6 +26,27 @@ pub fn upgrade_cache_dir() -> Result { let cache_dir = project_dirs.data_dir().join("upgrades"); fs::create_dir_all(&cache_dir)?; + // Defence in depth: restrict the shared upgrade cache to the owning + // user (0700) so a co-located low-privilege process cannot + // write/tamper with cached archives in the first place. The ML-DSA + // re-verification on every cache hit is the primary control; this just + // shrinks the attack surface. Best-effort on Unix; a failure to tighten + // permissions must not break upgrades (the crypto gate still holds). + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(meta) = fs::metadata(&cache_dir) { + let mut perms = meta.permissions(); + perms.set_mode(0o700); + if let Err(e) = fs::set_permissions(&cache_dir, perms) { + crate::logging::warn!( + "Could not tighten upgrade cache dir permissions to 0700 ({e}); \ + ML-DSA re-verification still protects cached archives" + ); + } + } + } + Ok(cache_dir) } From 20201325f78401fcf668da5d3fe58422101bf3ad Mon Sep 17 00:00:00 2001 From: grumbach Date: Wed, 20 May 2026 17:00:07 +0900 Subject: [PATCH 2/6] address review: harden meta read, narrow copy TOCTOU, fix stale wording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback on the upgrade binary cache: - `meta.json` was read with an unbounded `fs::read_to_string`. An attacker with write access to the shared cache directory could plant the metadata sidecar as a symlink to `/dev/zero` or as a huge file and stall the read into a hang/OOM before the archive/sig hardening ran. The metadata path now goes through the same open-once-and-validate gate as the archive: regular-file check on the opened handle, capped at `MAX_META_BYTES` (4 KiB). - Archive + signature staging previously did `symlink_metadata` (path) followed by `fs::copy` (path), leaving a small TOCTOU window where an attacker could race-swap the path to a symlink/FIFO/device or an oversized file between the check and the copy. Both files are now opened once via `open_regular_capped`, validated on the resulting `File` handle (size + file-type), and copied into the private staging dir from the open handle (wrapped in `Read::take(len)` as belt-and-braces against a post-open extension). All subsequent operations on those files use the staged private bytes, never the shared path. - Comment fix: the prior comment claimed `sha256_file` loads the archive into memory in full. It actually streams in 8 KiB chunks; the memory-pressure concern is `signature::verify_from_file*` (FIPS-204 requires the message as a slice). Wording updated. - Stale error message "Failed to serialize binary cache meta" updated to "Failed to serialize cached archive metadata" — the cache now stores archive metadata, not extracted-binary metadata. Two new tests: test_oversized_meta_is_rejected test_meta_symlink_to_special_file_is_rejected (Unix-only) 488 lib tests pass; cfd clean. --- src/upgrade/binary_cache.rs | 232 ++++++++++++++++++++++++++---------- 1 file changed, 169 insertions(+), 63 deletions(-) diff --git a/src/upgrade/binary_cache.rs b/src/upgrade/binary_cache.rs index b708a1c5..3eef3895 100644 --- a/src/upgrade/binary_cache.rs +++ b/src/upgrade/binary_cache.rs @@ -51,10 +51,18 @@ use fs2::FileExt; use saorsa_pqc::api::sig::MlDsaPublicKey; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::fs::{self, File}; -use std::io::{Read, Write}; +use std::fs::{self, File, OpenOptions}; +use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; +/// Maximum size accepted for the `.meta.json` sidecar. +/// +/// A well-formed `CachedArchiveMeta` serialises to roughly 120 bytes; the +/// 4 KiB cap is comfortably above any legitimate payload and tight enough +/// that an attacker who plants a metadata file the size of `/dev/zero` +/// cannot stall the metadata read into a hang or OOM. +const MAX_META_BYTES: u64 = 4 * 1024; + /// On-disk cache for downloaded, signature-verified upgrade archives. #[derive(Clone)] pub struct BinaryCache { @@ -157,13 +165,40 @@ impl BinaryCache { /// The caller MUST extract the binary from the returned (private) archive /// path, so the executed bytes always derive from signature-verified /// input that no other principal could have modified post-verification. + // The verifier-side cache-hit gate is read top-to-bottom by anyone + // auditing the security model. Splitting it into smaller helpers just + // to placate clippy's line limit would scatter the threat model across + // call sites without improving safety. + #[allow(clippy::too_many_lines)] #[must_use] pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option { let cached_archive = self.cached_archive_path(version); let cached_sig = self.cached_signature_path(version); let meta_path = self.meta_path(version); - let meta_data = fs::read_to_string(&meta_path).ok()?; + // Read the metadata sidecar with a small, opened-handle size cap so + // an attacker with cache-dir write cannot plant `meta.json` as a + // symlink to `/dev/zero` (or any large/special file) and force a + // hang/OOM here before the archive/sig hardening runs. + let meta_data = { + let (mut meta_file, meta_len) = match open_regular_capped(&meta_path, MAX_META_BYTES) { + Ok(pair) => pair, + Err(e) => { + debug!("Rejecting cache metadata for {version}: {e}"); + return None; + } + }; + // `meta_len` is capped at MAX_META_BYTES (4 KiB), so this + // truncation can never happen in practice; saturating_cast + // makes that explicit for clippy on 32-bit targets. + let cap = usize::try_from(meta_len).unwrap_or(usize::MAX); + let mut buf = String::with_capacity(cap); + if let Err(e) = meta_file.read_to_string(&mut buf) { + debug!("Failed to read cache metadata for {version}: {e}"); + return None; + } + buf + }; let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?; if meta.version != version { @@ -171,88 +206,77 @@ impl BinaryCache { return None; } - // Size policy gate — runs BEFORE we copy or read the cached files. + // Open archive + signature ONCE each with size and file-type + // validation on the opened handles. Subsequent reads / hash / + // signature verification all go through the FDs opened here — there + // is no second path-based stat or open after this point, so an + // attacker who races a swap on the cache-dir paths (symlink, FIFO, + // device, oversized file) after these validations cannot redirect + // what gets staged into the private dir. // - // `signature::verify_from_file*` and `sha256_file` both load the - // archive into memory in full. An attacker with cache-dir write - // access could otherwise drop a multi-GB `.archive` and force disk - // exhaustion in the staging dir or an OOM during re-verification - // before the entry is rejected. The download path already enforces - // `MAX_ARCHIVE_SIZE_BYTES`; cache hits must honour the same bound, - // plus the fixed `SIGNATURE_SIZE`. - // Use `symlink_metadata` (does NOT follow symlinks) and require a - // regular file. Otherwise a cache-dir writer could plant - // `ant-node-X.archive -> /dev/zero` (or a FIFO/device) whose `.len()` - // stats as 0 — passing a `fs::metadata` size check while - // `fs::copy` then reads indefinitely from the underlying special - // file, exhausting disk in the private staging dir. - let archive_meta = match fs::symlink_metadata(&cached_archive) { - Ok(m) => m, + // Memory pressure note: `signature::verify_from_file*` reads the + // archive into memory in full (it is the FIPS-204 verifier's + // contract — message must be provided as a slice). `sha256_file` + // streams in 8 KiB chunks and is not an OOM vector. The + // `MAX_ARCHIVE_SIZE_BYTES` cap bounds the in-memory load and the + // staging-dir disk footprint together. + let (mut archive_file, archive_len) = match open_regular_capped( + &cached_archive, + crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64, + ) { + Ok(pair) => pair, Err(e) => { - debug!("Cannot stat cached archive for {version}: {e}"); + warn!("Rejecting cached archive for {version}: {e}"); return None; } }; - if !archive_meta.file_type().is_file() { - warn!( - "Cached archive for {version} is not a regular file \ - (symlink/special); discarding cache entry" - ); - return None; - } - if archive_meta.len() > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 { - warn!( - "Cached archive for {version} exceeds MAX_ARCHIVE_SIZE_BYTES \ - ({} bytes); discarding cache entry", - archive_meta.len() - ); - return None; - } - let sig_meta = match fs::symlink_metadata(&cached_sig) { - Ok(m) => m, - Err(e) => { - debug!("Cannot stat cached signature for {version}: {e}"); - return None; - } - }; - if !sig_meta.file_type().is_file() { - warn!( - "Cached signature for {version} is not a regular file \ - (symlink/special); discarding cache entry" - ); - return None; - } - if sig_meta.len() != signature::SIGNATURE_SIZE as u64 { + let (mut sig_file, sig_len) = + match open_regular_capped(&cached_sig, signature::SIGNATURE_SIZE as u64) { + Ok(pair) => pair, + Err(e) => { + warn!("Rejecting cached signature for {version}: {e}"); + return None; + } + }; + if sig_len != signature::SIGNATURE_SIZE as u64 { + // open_regular_capped enforces ≤ max; we additionally require + // EXACTLY SIGNATURE_SIZE (a shorter sig is not valid ML-DSA-65). warn!( - "Cached signature for {version} has wrong size ({} bytes, \ - expected {}); discarding cache entry", - sig_meta.len(), + "Cached signature for {version} has wrong size ({sig_len} bytes, \ + expected {})", signature::SIGNATURE_SIZE ); return None; } - // Copy archive + signature into the caller-private directory. - // Everything below operates only on these private copies, which the - // attacker cannot reach — eliminating any verify/extract TOCTOU on - // the shared cache files. + // Stream the validated archive + signature into the caller-private + // directory FROM THE ALREADY-OPEN HANDLES (not from the path), so + // the bytes the verifier reads are the exact bytes the open-handle + // metadata checks were performed against. `take()` is belt-and- + // braces against an attacker who extends the file after open. let private_archive = private_dir.join(format!("cached-{version}.archive")); let private_sig = private_dir.join(format!("cached-{version}.sig")); - // Cleanup helper defined BEFORE any copy so even a partially-created - // destination from a failed copy is removed on every rejection path. let cleanup = |reason: &str| { debug!("Cleaning staged cache copy for {version}: {reason}"); let _ = fs::remove_file(&private_archive); let _ = fs::remove_file(&private_sig); }; - if let Err(e) = fs::copy(&cached_archive, &private_archive) { + if let Err(e) = (|| -> io::Result<()> { + let mut dest = File::create(&private_archive)?; + io::copy(&mut (&mut archive_file).take(archive_len), &mut dest)?; + Ok(()) + })() { debug!("Could not stage cached archive for {version}: {e}"); cleanup("archive copy failed"); return None; } - if let Err(e) = fs::copy(&cached_sig, &private_sig) { + if let Err(e) = (|| -> io::Result<()> { + let mut dest = File::create(&private_sig)?; + io::copy(&mut (&mut sig_file).take(sig_len), &mut dest)?; + Ok(()) + })() { debug!("Could not stage cached signature for {version}: {e}"); cleanup("signature copy failed"); return None; @@ -390,8 +414,9 @@ impl BinaryCache { cached_at_epoch_secs: now, }; - let meta_json = serde_json::to_string(&meta) - .map_err(|e| Error::Upgrade(format!("Failed to serialize binary cache meta: {e}")))?; + let meta_json = serde_json::to_string(&meta).map_err(|e| { + Error::Upgrade(format!("Failed to serialize cached archive metadata: {e}")) + })?; // Metadata written last so a reader never sees a complete meta file // pointing at an incomplete archive/signature pair. @@ -456,6 +481,39 @@ pub struct DownloadLockGuard { _file: File, } +/// Open `path` as a regular file with size at most `max_len`, validating +/// the metadata on the **opened handle** so a race between any prior stat +/// and the read cannot substitute a special file (FIFO/device/socket) or +/// an oversized payload. A symlink whose target is a regular file is +/// accepted (it's just an indirect path to a regular file — the attacker +/// who placed the link already needed write access to the cache dir, the +/// same access level as directly editing the regular file); a symlink +/// whose target is a special file is rejected by the `is_file()` check on +/// the opened handle. +/// +/// Returns `(File, len)` on success; the returned `File` is positioned at +/// offset 0 and may be `io::copy`'d into a destination — callers should +/// wrap with `Read::take(max_len)` so an attacker who extends the file +/// after the metadata read cannot stream beyond the cap. +fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> { + let file = OpenOptions::new().read(true).open(path)?; + let meta = file.metadata()?; + if !meta.file_type().is_file() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "not a regular file (FIFO/device/socket/dir)", + )); + } + let len = meta.len(); + if len > max_len { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("file exceeds size cap ({len} > {max_len})"), + )); + } + Ok((file, len)) +} + /// Compute the hex-encoded SHA-256 digest of a file. fn sha256_file(path: &Path) -> Result { let mut file = File::open(path)?; @@ -752,4 +810,52 @@ mod tests { // Nothing should have been staged. assert!(!pd.path().join("cached-4.0.0.archive").exists()); } + + /// `.meta.json` is read through the same size/file-type gate as the + /// archive and signature: planting a multi-MB metadata file (or a + /// metadata symlink to a special file) is rejected pre-parse without + /// risking a hang or large allocation. + #[test] + fn test_oversized_meta_is_rejected() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Establish a valid entry so archive/sig are well-formed. + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("5.0.0", &archive, &sig).unwrap(); + + // Overwrite meta with a file well above MAX_META_BYTES of garbage. + let meta_path = cache.meta_path("5.0.0"); + let huge = vec![b'a'; usize::try_from(MAX_META_BYTES).unwrap_or(usize::MAX) + 1024]; + fs::write(&meta_path, &huge).unwrap(); + + assert!( + cache.get_verified_archive("5.0.0", pd.path()).is_none(), + "oversized metadata file must be rejected before parsing" + ); + } + + /// `.meta.json` planted as a symlink to a special file (e.g. + /// `/dev/zero`) is rejected by the open-handle file-type check, + /// without hanging or OOM'ing on the read. + #[cfg(unix)] + #[test] + fn test_meta_symlink_to_special_file_is_rejected() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("5.1.0", &archive, &sig).unwrap(); + + let meta_path = cache.meta_path("5.1.0"); + fs::remove_file(&meta_path).unwrap(); + std::os::unix::fs::symlink("/dev/zero", &meta_path).unwrap(); + + assert!( + cache.get_verified_archive("5.1.0", pd.path()).is_none(), + "metadata symlink to a special file must be rejected" + ); + } } From 0d53f7212e6d1f25d5ab0a4ce221cd9902e1452c Mon Sep 17 00:00:00 2001 From: grumbach Date: Fri, 22 May 2026 15:07:07 +0900 Subject: [PATCH 3/6] fix(upgrade): reject FIFO/pipe planted at cache entry path Close a local DoS on auto-upgrade: a cache-dir attacker could plant a FIFO at ant-node-.archive (or .sig / .meta.json) and open() for reading would block indefinitely waiting for a writer, hanging the upgrade. open_regular_capped previously only checked file type AFTER the blocking open. Two-layer defence in open_regular_capped: - Pre-check via fs::metadata (follows symlinks), reject non-regular files before open(). A symlink-to-regular is still accepted as before; a symlink-to-FIFO/device/socket is rejected. - On Unix, also open with O_NONBLOCK so a race between the pre-check and open() cannot reopen the FIFO window. Reads on regular files ignore O_NONBLOCK, so this is a no-op for the happy path. Platform- specific constant (0o4000 Linux, 0x0004 macOS/BSD); fallback to no flag on unknown unix-likes. The existing post-open is_file() check on the file handle remains the TOCTOU-safe final gate. New regression test test_fifo_cached_archive_does_not_hang plants a real FIFO via mkfifo and asserts return in well under 2s. 14/14 binary_cache tests pass; cfd clean. --- src/upgrade/binary_cache.rs | 122 +++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/src/upgrade/binary_cache.rs b/src/upgrade/binary_cache.rs index 3eef3895..cbe7c52a 100644 --- a/src/upgrade/binary_cache.rs +++ b/src/upgrade/binary_cache.rs @@ -491,12 +491,78 @@ pub struct DownloadLockGuard { /// whose target is a special file is rejected by the `is_file()` check on /// the opened handle. /// +/// On Unix, `open()` of a FIFO/named-pipe for reading blocks until a +/// writer connects, so a cache-dir attacker could otherwise hang the +/// upgrade indefinitely by planting a FIFO at the cache entry's path. We +/// (a) reject non-regular files via a `fs::metadata()` pre-check (follows +/// symlinks, so a symlink-to-regular is still accepted), and (b) on Unix +/// also open with `O_NONBLOCK` as a belt-and-braces defence in case the +/// pre-check races a swap. The post-open `is_file()` on the opened handle +/// remains the TOCTOU-safe gate. +/// /// Returns `(File, len)` on success; the returned `File` is positioned at /// offset 0 and may be `io::copy`'d into a destination — callers should /// wrap with `Read::take(max_len)` so an attacker who extends the file /// after the metadata read cannot stream beyond the cap. fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> { - let file = OpenOptions::new().read(true).open(path)?; + // Pre-check: refuse to even open a non-regular file. This is the + // first line of defence against an attacker who planted a FIFO at + // `path` — opening a FIFO for reading on Unix blocks until a writer + // connects, hanging the upgrade indefinitely. `fs::metadata` follows + // symlinks, so a symlink whose target is a regular file is accepted + // here and a symlink whose target is a FIFO/device/socket is rejected. + let pre_meta = fs::metadata(path)?; + if !pre_meta.file_type().is_file() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "not a regular file (FIFO/device/socket/dir)", + )); + } + + // Belt-and-braces against a pre-check vs open() race: on Unix also + // open with O_NONBLOCK, so even if an attacker swaps the regular file + // for a FIFO between the metadata read and open(), the open() returns + // immediately instead of blocking on a writer. Reads on a regular file + // ignore O_NONBLOCK, so this is a no-op for the happy path. The + // post-open is_file() check below still catches the swap. + let file = { + let mut opts = OpenOptions::new(); + opts.read(true); + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + // O_NONBLOCK is platform-specific: 0o4000 on Linux, 0x0004 on + // macOS/*BSD. Reads on a regular file ignore O_NONBLOCK on all + // these platforms, so this is a no-op for the happy path. + #[cfg(target_os = "linux")] + const O_NONBLOCK: i32 = 0o4000; + #[cfg(any( + target_os = "macos", + target_os = "ios", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly", + ))] + const O_NONBLOCK: i32 = 0x0004; + // Fallback for other unix-likes: skip the flag rather than + // guess wrong. The pre-check + post-open is_file() still gate. + #[cfg(not(any( + target_os = "linux", + target_os = "macos", + target_os = "ios", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly", + )))] + const O_NONBLOCK: i32 = 0; + if O_NONBLOCK != 0 { + opts.custom_flags(O_NONBLOCK); + } + } + opts.open(path)? + }; let meta = file.metadata()?; if !meta.file_type().is_file() { return Err(io::Error::new( @@ -836,6 +902,60 @@ mod tests { ); } + /// A cache-dir attacker who replaces the cached archive with a FIFO + /// must not be able to hang `get_verified_archive` waiting for a + /// writer to connect. The pre-check + O_NONBLOCK belt-and-braces + /// returns immediately with an error, the cache hit is abandoned, and + /// the caller falls back to a fresh verified download. + #[cfg(unix)] + #[test] + fn test_fifo_cached_archive_does_not_hang() { + use std::time::{Duration, Instant}; + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a legit signed entry so meta/version/sig-size are good, + // then replace the cached archive with a FIFO. Without the + // pre-check + O_NONBLOCK, opening the FIFO for reading would + // block until a writer connected. + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("6.0.0", &archive, &sig).unwrap(); + let cached_archive = cache.cached_archive_path("6.0.0"); + fs::remove_file(&cached_archive).unwrap(); + let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap(); + // mkfifo via libc-equivalent: use the nix-free path through + // `std::process::Command` to avoid pulling a libc dep just for + // the test. `mkfifo` is in coreutils on Linux and bundled on + // macOS — both CI targets. + let mkfifo_ok = std::process::Command::new("mkfifo") + .arg(cstr.to_str().unwrap()) + .status() + .ok() + .is_some_and(|s| s.success()); + if !mkfifo_ok { + // If mkfifo isn't available skip rather than fail the suite. + eprintln!("mkfifo unavailable, skipping FIFO test"); + return; + } + + let start = Instant::now(); + let got = cache.get_verified_archive("6.0.0", pd.path()); + let elapsed = start.elapsed(); + + assert!( + got.is_none(), + "a FIFO planted at the cached archive path must be rejected" + ); + assert!( + elapsed < Duration::from_secs(2), + "open of FIFO returned in {elapsed:?}, expected ≪ 2s — \ + pre-check or O_NONBLOCK is not catching this" + ); + // Nothing should have been staged. + assert!(!pd.path().join("cached-6.0.0.archive").exists()); + } + /// `.meta.json` planted as a symlink to a special file (e.g. /// `/dev/zero`) is rejected by the open-handle file-type check, /// without hanging or OOM'ing on the read. From aeb754bfbf80afe397cd757dfc6e369e6d168ca6 Mon Sep 17 00:00:00 2001 From: grumbach Date: Fri, 22 May 2026 15:14:04 +0900 Subject: [PATCH 4/6] refactor(upgrade): address review on FIFO DoS fix Round 2 from adversarial review: - Replace hand-coded O_NONBLOCK constants with libc::O_NONBLOCK. The previous 0o4000/0x0004 per-OS values were correct on x86_64/aarch64/arm but wrong on Linux/MIPS (0o200) and Linux/SPARC (0x4000), where 0o4000 maps to O_NOATIME. Using the libc constant always picks the right value for the target arch. Add libc as a Unix-only direct dependency (was already transitive). - Test test_fifo_cached_archive_does_not_hang: replace the mkfifo shell-out with libc::mkfifo so a CI image that drops coreutils cannot silently skip this test. Bump the budget from 2s to 5s to absorb GitHub Actions macOS runner cold-start variance, since the failure mode "O_NONBLOCK wrong on this arch" and "CI runner slow" look identical from the assertion. - Document the load-bearing invariant on get_verified_archive's private_dir: callers MUST supply a process-private 0o700 dir (apply.rs already does via tempfile + permissions). Without that the reopens-by-path in sha256_file/verify_archive would reopen a TOCTOU window. - Add a cross-reference comment explaining the intentional asymmetry between store_archive (uses symlink_metadata, rejects symlinks) and open_regular_capped (uses fs::metadata, accepts symlink-to-regular) so a later editor doesn't unify them in the wrong direction. 14/14 binary_cache tests pass, 489/489 lib tests pass, cfd clean. --- Cargo.toml | 3 ++ src/upgrade/binary_cache.rs | 86 ++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ab3f24ac..002c8358 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,6 +106,9 @@ page_size = "0.6" # Protocol serialization postcard = { version = "1.1.3", features = ["use-std"] } +[target.'cfg(unix)'.dependencies] +libc = "0.2" + [target.'cfg(windows)'.dependencies] self-replace = "1" diff --git a/src/upgrade/binary_cache.rs b/src/upgrade/binary_cache.rs index cbe7c52a..af505fdd 100644 --- a/src/upgrade/binary_cache.rs +++ b/src/upgrade/binary_cache.rs @@ -165,6 +165,14 @@ impl BinaryCache { /// The caller MUST extract the binary from the returned (private) archive /// path, so the executed bytes always derive from signature-verified /// input that no other principal could have modified post-verification. + /// + /// `private_dir` is a load-bearing security invariant: it MUST be a + /// process-private, mode-`0o700` directory that no other principal + /// can write to. The caller in `apply.rs` creates it via + /// `tempfile::Builder::permissions(0o700).tempdir_in(binary_dir)` — + /// any future caller MUST uphold the same invariant, otherwise the + /// reopens by path in `sha256_file` and `verify_archive` would re- + /// introduce a TOCTOU window. // The verifier-side cache-hit gate is read top-to-bottom by anyone // auditing the security model. Splitting it into smaller helpers just // to placate clippy's line limit would scatter the threat model across @@ -347,6 +355,17 @@ impl BinaryCache { // oversize archive, or a misshapen signature — mirroring the // `get_verified_archive` cache-hit policy. `symlink_metadata` // refuses to chase a symlink the caller may have planted. + // + // Note the intentional asymmetry with `open_regular_capped` + // (which uses `fs::metadata` and DOES follow symlinks): on the + // store path the source file is supplied by the caller (typically + // a path under our control after download), so a symlink there is + // surprising and worth rejecting. On the read path the cache dir + // is shared and an attacker may have planted a symlink — but the + // attacker already has write access, so chasing a symlink-to- + // regular is no worse than them editing the regular file + // directly, while still letting the post-open `is_file()` reject + // symlink-to-special. let archive_meta = fs::symlink_metadata(archive_path)?; if !archive_meta.file_type().is_file() { return Err(Error::Upgrade(format!( @@ -531,35 +550,13 @@ fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> { #[cfg(unix)] { use std::os::unix::fs::OpenOptionsExt; - // O_NONBLOCK is platform-specific: 0o4000 on Linux, 0x0004 on - // macOS/*BSD. Reads on a regular file ignore O_NONBLOCK on all - // these platforms, so this is a no-op for the happy path. - #[cfg(target_os = "linux")] - const O_NONBLOCK: i32 = 0o4000; - #[cfg(any( - target_os = "macos", - target_os = "ios", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "dragonfly", - ))] - const O_NONBLOCK: i32 = 0x0004; - // Fallback for other unix-likes: skip the flag rather than - // guess wrong. The pre-check + post-open is_file() still gate. - #[cfg(not(any( - target_os = "linux", - target_os = "macos", - target_os = "ios", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "dragonfly", - )))] - const O_NONBLOCK: i32 = 0; - if O_NONBLOCK != 0 { - opts.custom_flags(O_NONBLOCK); - } + // `O_NONBLOCK` is per-arch on Linux (0o4000 on x86/arm/aarch64 + // /riscv, 0o200 on mips, 0x4000 on sparc, etc.). Use `libc` + // so we always pick the right constant for the target arch + // instead of silently setting a different flag. Reads on a + // regular file ignore `O_NONBLOCK` on all our supported + // platforms, so this is a no-op for the happy path. + opts.custom_flags(libc::O_NONBLOCK); } opts.open(path)? }; @@ -923,22 +920,19 @@ mod tests { cache.store_archive("6.0.0", &archive, &sig).unwrap(); let cached_archive = cache.cached_archive_path("6.0.0"); fs::remove_file(&cached_archive).unwrap(); + + // Use libc::mkfifo directly so a CI image that drops coreutils + // can't silently skip this test (an earlier shell-out version + // would hide a packaging regression). The unsafe block is scoped + // to the single FFI call — `mkfifo(2)` takes a NUL-terminated + // path, returns 0 on success and -1 on error with errno set. let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap(); - // mkfifo via libc-equivalent: use the nix-free path through - // `std::process::Command` to avoid pulling a libc dep just for - // the test. `mkfifo` is in coreutils on Linux and bundled on - // macOS — both CI targets. - let mkfifo_ok = std::process::Command::new("mkfifo") - .arg(cstr.to_str().unwrap()) - .status() - .ok() - .is_some_and(|s| s.success()); - if !mkfifo_ok { - // If mkfifo isn't available skip rather than fail the suite. - eprintln!("mkfifo unavailable, skipping FIFO test"); - return; - } + #[allow(unsafe_code)] + let rc = unsafe { libc::mkfifo(cstr.as_ptr(), 0o600) }; + assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error()); + // Measure only the cache-hit path so cold-process startup or + // unrelated test parallelism don't blow the budget. let start = Instant::now(); let got = cache.get_verified_archive("6.0.0", pd.path()); let elapsed = start.elapsed(); @@ -947,9 +941,11 @@ mod tests { got.is_none(), "a FIFO planted at the cached archive path must be rejected" ); + // 5s gives generous headroom on a contended CI macOS runner + // while still catching a real "open is blocking on the FIFO". assert!( - elapsed < Duration::from_secs(2), - "open of FIFO returned in {elapsed:?}, expected ≪ 2s — \ + elapsed < Duration::from_secs(5), + "open of FIFO returned in {elapsed:?}, expected ≪ 5s — \ pre-check or O_NONBLOCK is not catching this" ); // Nothing should have been staged. From ae63d6de50fd15feb625e966b98a6799eb0f6d71 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Tue, 19 May 2026 17:59:11 +0100 Subject: [PATCH 5/6] ci: build Linux releases against musl Switch both Linux release targets from glibc to musl so the published binaries run on any Linux distribution, including Alpine and other musl-based systems. Asset filenames are unchanged (ant-node-cli-linux-{arm64,x64}.tar.gz) so existing auto-upgraders on deployed nodes continue to find them. x86_64-unknown-linux-musl now uses `cross` for the musl toolchain (matching aarch64). musl-static binaries have no dynamic linker dependency and execute on glibc hosts as well as musl hosts. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/release.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c3e8996..b49063a5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -82,13 +82,16 @@ jobs: fail-fast: false matrix: include: - # Use ubuntu-22.04 for GLIBC 2.35 compatibility with server deployments - - target: x86_64-unknown-linux-gnu + # Linux builds use musl for portability across glibc and musl distros + # (e.g. Alpine). Built via `cross` so the musl toolchain is provided + # by the cross-rs container image. + - target: x86_64-unknown-linux-musl os: ubuntu-22.04 binary: ant-node archive: tar.gz + cross: true friendly_name: linux-x64 - - target: aarch64-unknown-linux-gnu + - target: aarch64-unknown-linux-musl os: ubuntu-22.04 binary: ant-node archive: tar.gz From 857969f0d960e44df737ebed8c549475175b2c8b Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Tue, 19 May 2026 18:04:17 +0100 Subject: [PATCH 6/6] feat: use mimalloc as global allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit musl's default malloc is notably slower than glibc's under concurrent allocation churn — the steady-state shape of a DHT-bridged P2P node. Switching the global allocator to mimalloc neutralises that regression for the musl Linux builds, and tends to outperform glibc's allocator as well, so all builds benefit. Applied to both ant-node and ant-devnet binaries. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 20 ++++++++++++++++++++ Cargo.toml | 6 ++++++ src/bin/ant-devnet/main.rs | 3 +++ src/bin/ant-node/main.rs | 3 +++ 4 files changed, 32 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 3cf0f2a3..c16e1331 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -825,7 +825,9 @@ dependencies = [ "futures", "heed", "hex", + "libc", "lru", + "mimalloc", "objc2", "objc2-foundation", "page_size", @@ -3466,6 +3468,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "libmimalloc-sys" +version = "0.1.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d1eacfa31c33ec25e873c136ba5669f00f9866d0688bea7be4d3f7e43067df6" +dependencies = [ + "cc", +] + [[package]] name = "libredox" version = "0.1.16" @@ -3584,6 +3595,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mimalloc" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3627c4272df786b9260cabaa46aec1d59c93ede723d4c3ef646c503816b0640" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "minimal-lexical" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 002c8358..9551fc6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,12 @@ name = "ant-devnet" path = "src/bin/ant-devnet/main.rs" [dependencies] +# Global allocator. musl's default malloc is significantly slower than +# glibc's under concurrent allocation churn, which matches the node's +# steady-state workload. mimalloc neutralises that regression for the +# musl Linux builds (and tends to beat glibc's allocator too). +mimalloc = "0.1" + # Wire protocol — the single version-pin shared with ant-client. # Bumping ant-protocol's `evmlib`/`saorsa-core`/`saorsa-pqc` pins ripples # through here automatically; we keep a direct saorsa-core dep for diff --git a/src/bin/ant-devnet/main.rs b/src/bin/ant-devnet/main.rs index 1117f7de..44d85b7b 100644 --- a/src/bin/ant-devnet/main.rs +++ b/src/bin/ant-devnet/main.rs @@ -2,6 +2,9 @@ #![cfg_attr(not(feature = "logging"), allow(unused_variables))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + mod cli; use ant_node::devnet::{Devnet, DevnetConfig, DevnetEvmInfo, DevnetManifest}; diff --git a/src/bin/ant-node/main.rs b/src/bin/ant-node/main.rs index 3af62dc9..6849103d 100644 --- a/src/bin/ant-node/main.rs +++ b/src/bin/ant-node/main.rs @@ -2,6 +2,9 @@ #![cfg_attr(not(feature = "logging"), allow(unused_variables))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + mod cli; mod platform;