From 42e55dc416266c199a0390d21e779609bd68f5c0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 16:29:22 +0530 Subject: [PATCH 01/40] snapshot abstraction --- crates/commitlog/src/lib.rs | 60 ++++++--- crates/commitlog/src/repo/mod.rs | 11 ++ crates/core/src/db/persistence.rs | 8 +- crates/core/src/db/relational_db.rs | 66 ++++++--- crates/core/src/db/snapshot.rs | 18 +-- crates/core/src/db/update.rs | 2 +- .../src/locking_tx_datastore/datastore.rs | 40 ++++-- crates/durability/src/imp/local.rs | 83 ++++++++++-- crates/snapshot/src/lib.rs | 126 +++++++++++++++++- 9 files changed, 340 insertions(+), 74 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 3922f002a84..d80c1fb00b7 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -151,15 +151,22 @@ impl Options { } } -/// The canonical commitlog, backed by on-disk log files. +/// The canonical commitlog API over a repository backend `R`. +/// +/// The default backend is the on-disk filesystem repository +/// [`repo::Fs`], but tests may supply another [`Repo`] +/// implementation. /// /// Records in the log are of type `T`, which canonically is instantiated to /// [`payload::Txdata`]. -pub struct Commitlog { - inner: RwLock>, +pub struct Commitlog +where + R: Repo, +{ + inner: RwLock>, } -impl Commitlog { +impl Commitlog { /// Open the log at root directory `root` with [`Options`]. /// /// The root directory must already exist. @@ -178,7 +185,26 @@ impl Commitlog { root.display() ); } - let inner = commitlog::Generic::open(repo::Fs::new(root, on_new_segment)?, opts)?; + Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) + } + + /// Determine the size on disk of this commitlog. + pub fn size_on_disk(&self) -> io::Result { + let inner = self.inner.read().unwrap(); + inner.repo.size_on_disk() + } +} + +impl Commitlog +where + R: Repo, +{ + /// Open the log in `repo` with [`Options`]. + /// + /// This is useful for tests which provide a repository + /// implementation other than [`repo::Fs`]. + pub fn open_with_repo(repo: R, opts: Options) -> io::Result { + let inner = commitlog::Generic::open(repo, opts)?; Ok(Self { inner: RwLock::new(inner), @@ -307,7 +333,7 @@ impl Commitlog { /// This means that, when this iterator yields an `Err` value, the consumer /// may want to check if the iterator is exhausted (by calling `next()`) /// before treating the `Err` value as an application error. - pub fn commits(&self) -> impl Iterator> + use { + pub fn commits(&self) -> impl Iterator> + use { self.commits_from(0) } @@ -320,7 +346,10 @@ impl Commitlog { /// Note that the first [`StoredCommit`] yielded is the first commit /// containing the given transaction offset, i.e. its `min_tx_offset` may be /// smaller than `offset`. - pub fn commits_from(&self, offset: u64) -> impl Iterator> + use { + pub fn commits_from( + &self, + offset: u64, + ) -> impl Iterator> + use { self.inner.read().unwrap().commits_from(offset) } @@ -374,15 +403,12 @@ impl Commitlog { inner: RwLock::new(inner), }) } - - /// Determine the size on disk of this commitlog. - pub fn size_on_disk(&self) -> io::Result { - let inner = self.inner.read().unwrap(); - inner.repo.size_on_disk() - } } -impl Commitlog { +impl Commitlog +where + R: Repo, +{ /// Write `transactions` to the log. /// /// This will store all `transactions` as a single [Commit] @@ -452,10 +478,11 @@ impl Commitlog { pub fn transactions<'a, D>( &self, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.transactions_from(0, de) @@ -471,10 +498,11 @@ impl Commitlog { &self, offset: u64, de: &'a D, - ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T> + ) -> impl Iterator, D::Error>> + 'a + use<'a, D, T, R> where D: Decoder, D::Error: From, + R: 'a, T: 'a, { self.inner.read().unwrap().transactions_from(offset, de) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 2b54216bad3..51df7accb81 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -144,6 +144,17 @@ pub trait Repo: Clone + fmt::Display { } } +/// Marker for repos that do not require an external lock file. +/// +/// Durability implementations can use this to expose repo-backed opening +/// only for storage backends where skipping the filesystem `db.lock` cannot +/// violate single-writer safety. +pub trait RepoWithoutLockFile: Repo {} + +impl RepoWithoutLockFile for &T {} + +impl RepoWithoutLockFile for Memory {} + impl Repo for &T { type SegmentWriter = T::SegmentWriter; type SegmentReader = T::SegmentReader; diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index e837506da38..5b0daa5145c 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::SnapshotRepository; +use spacetimedb_snapshot::DynSnapshotRepo; use crate::{messages::control_db::Database, util::asyncify}; @@ -61,9 +61,9 @@ impl Persistence { } } - /// If snapshots are enabled, get the [SnapshotRepository] they are stored in. - pub fn snapshot_repo(&self) -> Option<&SnapshotRepository> { - self.snapshots.as_ref().map(|worker| worker.repo()) + /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. + pub fn snapshot_repo(&self) -> Option> { + self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } /// Get the [TxOffset] reported as durable by the [Durability] impl. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index d8cd4884bcc..6df38806dca 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -51,7 +51,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -235,7 +235,7 @@ impl RelationalDB { /// /// - `snapshot_repo` /// - /// The [`SnapshotRepository`] which stores snapshots of this database. + /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must /// store views of the same sequence of TXes as the `history`. @@ -278,9 +278,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); + let snapshot_repo = persistence.as_ref().and_then(|p| p.snapshot_repo()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - persistence.as_ref().and_then(|p| p.snapshot_repo()), + snapshot_repo.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -292,7 +293,7 @@ impl RelationalDB { .snapshot_repo() .map(|repo| repo.database_identity() == database_identity) .unwrap_or(true), - "snapshot repository does not match database identity", + "snapshot repo does not match database identity", ); persistence.set_snapshot_state(inner.committed_state.clone()); } @@ -471,7 +472,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_repo: Option<&SnapshotRepository>, + snapshot_repo: Option<&DynSnapshotRepo>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -479,7 +480,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &SnapshotRepository, + snapshot_repo: &DynSnapshotRepo, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -592,11 +593,12 @@ impl RelationalDB { // Invalidate the snapshot if the error is permanent. // Newly created snapshots should not depend on it. if !is_transient_error(&e) { - let path = snapshot_repo.snapshot_dir_path(snapshot_offset); - log::info!("invalidating bad snapshot at {}", path.display()); - path.rename_invalid().map_err(|e| RestoreSnapshotError::Invalidate { - offset: snapshot_offset, - source: Box::new(e.into()), + log::info!("invalidating bad snapshot at {snapshot_offset}"); + snapshot_repo.invalidate_snapshot(snapshot_offset).map_err(|e| { + RestoreSnapshotError::Invalidate { + offset: snapshot_offset, + source: Box::new(e), + } })?; } // Try the next older one if the error was transient. @@ -612,7 +614,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot on disk"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -769,6 +771,19 @@ impl RelationalDB { r } + #[cfg(any(feature = "test", test))] + #[tracing::instrument(level = "trace", skip_all)] + pub fn try_begin_mut_tx(&self, isolation_level: IsolationLevel, workload: Workload) -> Option { + log::trace!("TRY BEGIN MUT TX"); + let r = self.inner.try_begin_mut_tx(isolation_level, workload); + if r.is_some() { + log::trace!("ACQUIRED MUT TX"); + } else { + log::trace!("MUT TX CONTENDED"); + } + r + } + #[tracing::instrument(level = "trace", skip_all)] pub fn begin_tx(&self, workload: Workload) -> Tx { log::trace!("BEGIN TX"); @@ -1007,7 +1022,7 @@ impl RelationalDB { Ok(self.inner.alter_table_row_type_mut_tx(tx, table_id, column_schemas)?) } - pub(crate) fn add_columns_to_table( + pub(crate) fn add_columns_to_table_mut_tx( &self, tx: &mut MutTx, table_id: TableId, @@ -1019,6 +1034,17 @@ impl RelationalDB { .add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?) } + #[cfg(any(feature = "test", test))] + pub fn add_columns_to_table( + &self, + tx: &mut MutTx, + table_id: TableId, + column_schemas: Vec, + default_values: Vec, + ) -> Result { + self.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values) + } + /// Reports the `TxMetrics`s passed. /// /// Should only be called after the tx lock has been fully released. @@ -1777,7 +1803,6 @@ pub mod tests_utils { use spacetimedb_fs_utils::compression::CompressType; use spacetimedb_lib::{bsatn::to_vec, ser::Serialize}; use spacetimedb_paths::server::ReplicaDir; - use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::FromPathUnchecked; use tempfile::TempDir; @@ -2091,7 +2116,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } @@ -3661,7 +3686,7 @@ mod tests { let repo = open_snapshot_repo(dir, Identity::ZERO, 0)?; RelationalDB::restore_from_snapshot_or_bootstrap( Identity::ZERO, - Some(&repo), + Some(repo.as_ref()), Some(last_compress), 0, PagePool::new_for_test(), @@ -3689,8 +3714,13 @@ mod tests { ); let last = repo.latest_snapshot()?; - let stdb = - RelationalDB::restore_from_snapshot_or_bootstrap(identity, Some(&repo), last, 0, PagePool::new_for_test())?; + let stdb = RelationalDB::restore_from_snapshot_or_bootstrap( + identity, + Some(repo.as_ref()), + last, + 0, + PagePool::new_for_test(), + )?; let out = TempDir::with_prefix("snapshot_test")?; let dir = SnapshotsPath::from_path_unchecked(out.path()); diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index c47e1d33d2d..26e3d8373cf 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, SnapshotRepository}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repository: Arc, } impl SnapshotWorker { @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { + pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { let database = snapshot_repository.database_identity(); let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -105,9 +105,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the [SnapshotRepository] this worker is operating on. - pub fn repo(&self) -> &SnapshotRepository { - &self.snapshot_repository + /// Get the snapshot repo this worker is operating on. + pub fn snapshot_repo(&self) -> Arc { + self.snapshot_repository.clone() } /// Request a snapshot to be taken. @@ -166,7 +166,7 @@ enum Request { struct SnapshotWorkerActor { snapshot_requests: mpsc::UnboundedReceiver, - snapshot_repo: Arc, + snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, compression: Option, @@ -225,7 +225,7 @@ impl SnapshotWorkerActor { let maybe_snapshot = asyncify(move || { let _timer = inner_timer.start_timer(); - Locking::take_snapshot_internal(&state, &snapshot_repo) + Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) }) .await .with_context(|| format!("error capturing snapshot of database {}", database_identity))?; @@ -307,7 +307,7 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, } diff --git a/crates/core/src/db/update.rs b/crates/core/src/db/update.rs index 6c7c3bd9fc8..f9ca4c110d9 100644 --- a/crates/core/src/db/update.rs +++ b/crates/core/src/db/update.rs @@ -317,7 +317,7 @@ fn auto_migrate_database( .iter() .filter_map(|col_def| col_def.default_value.clone()) .collect(); - stdb.add_columns_to_table(tx, table_id, column_schemas, default_values)?; + stdb.add_columns_to_table_mut_tx(tx, table_id, column_schemas, default_values)?; } spacetimedb_schema::auto_migrate::AutoMigrateStep::DisconnectAllUsers => { log!(logger, "Disconnecting all users"); diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index edcce91ce5e..e9d67103b16 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -30,7 +30,6 @@ use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::{db::auth::StAccess, metrics::ExecutionMetrics}; use spacetimedb_lib::{ConnectionId, Identity}; -use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_primitives::{ColId, ColList, ConstraintId, IndexId, SequenceId, TableId, ViewId}; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::{AlgebraicValue, ProductValue}; @@ -39,7 +38,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotRepository, UnflushedSnapshot}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -223,11 +222,11 @@ impl Locking { /// (i.e. no transactions have been committed yet) /// and therefore no snapshot was created /// - /// - or `Some` path to the newly created snapshot directory + /// - or `Some` transaction offset for the newly created snapshot /// - /// Returns an error if [`SnapshotRepository::create_snapshot`] returns an + /// Returns an error if [`DynSnapshotRepo::create_snapshot`] returns an /// error. - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result> { Self::take_snapshot_internal(&self.committed_state, repo)? .map(|(_offset, snap)| snap.sync_all()) .transpose() @@ -241,8 +240,8 @@ impl Locking { pub fn take_snapshot_internal( committed_state: &RwLock, - repo: &SnapshotRepository, - ) -> Result> { + repo: &DynSnapshotRepo, + ) -> Result> { let mut committed_state = committed_state.write(); let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { return Ok(None); @@ -254,8 +253,8 @@ impl Locking { tx_offset, ); - let (tables, blob_store) = committed_state.persistent_tables_and_blob_store(); - let unflushed_snapshot = repo.create_snapshot(tables, blob_store, tx_offset)?; + let (mut tables, blob_store) = committed_state.persistent_tables_and_blob_store(); + let unflushed_snapshot = repo.create_snapshot(&mut tables, blob_store, tx_offset)?; Ok(Some((tx_offset, unflushed_snapshot))) } @@ -924,6 +923,29 @@ impl MutTx for Locking { } impl Locking { + #[cfg(any(feature = "test", test))] + pub fn try_begin_mut_tx(&self, _isolation_level: IsolationLevel, workload: Workload) -> Option { + let metrics = ExecutionMetrics::default(); + let ctx = ExecutionContext::with_workload(self.database_identity, workload); + + let timer = Instant::now(); + let committed_state_write_lock = self.committed_state.try_write_arc()?; + let sequence_state_lock = self.sequence_state.try_lock_arc()?; + let lock_wait_time = timer.elapsed(); + + Some(MutTxId { + committed_state_write_lock, + sequence_state_lock, + tx_state: TxState::default(), + lock_wait_time, + read_sets: <_>::default(), + timer, + ctx, + metrics, + _not_send: std::marker::PhantomData, + }) + } + pub fn rollback_mut_tx_downgrade(&self, tx: MutTxId, workload: Workload) -> (TxMetrics, TxId) { tx.rollback_downgrade(workload) } diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 3bf1921e8a8..5cc03099ab6 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,7 +11,12 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; -use spacetimedb_commitlog::{error, payload::Txdata, Commit, Commitlog, Decoder, Encode, Transaction}; +use spacetimedb_commitlog::{ + error, + payload::Txdata, + repo::{Fs, Repo, RepoWithoutLockFile}, + Commit, Commitlog, Decoder, Encode, Transaction, +}; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; use thiserror::Error; @@ -83,9 +88,12 @@ pub enum OpenError { /// /// Note, however, that instantiating `T` to a different type may require to /// change the log format version! -pub struct Local { +pub struct Local +where + R: Repo, +{ /// The [`Commitlog`] this [`Durability`] and [`History`] impl wraps. - clog: Arc>>, + clog: Arc, R>>, /// The durable transaction offset, as reported by the background /// [`FlushAndSyncTask`]. durable_offset: watch::Receiver>, @@ -106,7 +114,7 @@ pub struct Local { actor: Mutex>>, } -impl Local { +impl Local { /// Create a [`Local`] instance at the `replica_dir`. /// /// `replica_dir` must already exist. @@ -132,6 +140,21 @@ impl Local { opts.commitlog, on_new_segment, )?); + Self::open_inner(clog, rt, opts, Some(lock)) + } +} + +impl Local +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ + fn open_inner( + clog: Arc, R>>, + rt: tokio::runtime::Handle, + opts: Options, + lock: Option, + ) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); @@ -161,12 +184,29 @@ impl Local { } /// Obtain a read-only copy of the durable state that implements [History]. - pub fn as_history(&self) -> impl History> + use { + pub fn as_history(&self) -> impl History> + use { self.clog.clone() } } -impl Local { +impl Local +where + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, +{ + /// Create a [`Local`] instance backed by the provided commitlog repo. + pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + info!("open local durability"); + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts, None) + } +} + +impl Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ /// Inspect how many transactions added via [`Self::append_tx`] are pending /// to be applied to the underlying [`Commitlog`]. pub fn queue_depth(&self) -> u64 { @@ -174,7 +214,7 @@ impl Local { } /// Obtain an iterator over the [`Commit`]s in the underlying log. - pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { + pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { self.clog.commits_from(offset).map_ok(Commit::from) } @@ -187,15 +227,20 @@ impl Local { pub fn compress_segments(&self, offsets: &[TxOffset]) -> io::Result<()> { self.clog.compress_segments(offsets) } +} +impl Local { /// Get the size on disk of the underlying [`Commitlog`]. pub fn size_on_disk(&self) -> io::Result { self.clog.size_on_disk() } } -struct Actor { - clog: Arc>>, +struct Actor +where + R: Repo, +{ + clog: Arc, R>>, durable_offset: watch::Sender>, queue_depth: Arc, @@ -203,10 +248,14 @@ struct Actor { batch_capacity: NonZeroUsize, #[allow(unused)] - lock: LockedFile, + lock: Option, } -impl Actor { +impl Actor +where + T: Encode + Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ #[instrument(name = "durability::local::actor", skip_all)] async fn run(self, transactions_rx: async_channel::Receiver>>) { info!("starting durability actor"); @@ -287,7 +336,11 @@ impl Actor { } } -impl Durability for Local { +impl Durability for Local +where + T: Send + Sync + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn append_tx(&self, tx: PreparedTx) { @@ -332,7 +385,11 @@ impl Durability for Local { } } -impl History for Commitlog> { +impl History for Commitlog, R> +where + T: Encode + 'static, + R: Repo + Send + Sync + 'static, +{ type TxData = Txdata; fn fold_transactions_from(&self, offset: TxOffset, decoder: D) -> Result<(), D::Error> diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 66c25ed824a..66bc3815af8 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -46,7 +46,7 @@ use spacetimedb_table::{ }; use std::fs::{self, File}; use std::io; -use std::ops::RangeBounds; +use std::ops::{Range, RangeBounds}; use std::path::Path; use std::time::{Duration, Instant}; use std::{ @@ -206,6 +206,11 @@ pub struct UnflushedSnapshot { } impl UnflushedSnapshot { + /// Return the transaction offset this pending snapshot will finalize at. + pub fn tx_offset(&self) -> TxOffset { + self.inner.as_ref().unwrap().snapshot.tx_offset + } + /// Sync all objects in the snapshot and write out the snapshot file. /// /// Returns the [SnapshotDirPath] on success. @@ -261,6 +266,28 @@ impl UnflushedSnapshotInner { } } +pub trait PendingSnapshot: Send { + /// Sync all snapshot state and return the finalized transaction offset. + fn sync_all(self: Box) -> Result; +} + +pub type BoxedPendingSnapshot = Box; +pub type DynSnapshotRepo = dyn SnapshotRepo; + +impl PendingSnapshot for BoxedPendingSnapshot { + fn sync_all(self: Box) -> Result { + (*self).sync_all() + } +} + +impl PendingSnapshot for UnflushedSnapshot { + fn sync_all(self: Box) -> Result { + let tx_offset = self.tx_offset(); + UnflushedSnapshot::sync_all(*self)?; + Ok(tx_offset) + } +} + #[derive(Clone, Serialize, Deserialize)] pub struct Snapshot { /// A magic number: must be equal to [`MAGIC`]. @@ -1139,13 +1166,19 @@ impl SnapshotRepository { .collect::>(); for newer_snapshot in newer_snapshots { - let path = self.snapshot_dir_path(newer_snapshot); - log::info!("Renaming snapshot newer than {upper_bound} from {path:?} to {path:?}"); - path.rename_invalid()?; + self.invalidate_snapshot(newer_snapshot)?; } Ok(()) } + /// Mark a single snapshot invalid so it will not be considered for future + /// restores. + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + let path = self.snapshot_dir_path(tx_offset); + log::info!("Renaming snapshot {tx_offset} from {path:?} to invalid"); + path.rename_invalid().map_err(Into::into) + } + /// Compress the `current` snapshot, unless it is already compressed. /// /// If a `parent` snapshot is given, its object repo will be used to @@ -1329,6 +1362,91 @@ impl SnapshotRepository { } } +/// Snapshot storage backend. +pub trait SnapshotRepo: Send + Sync { + type Pending: PendingSnapshot; + + /// Return the database identity associated with this snapshot backend. + fn database_identity(&self) -> Identity; + + /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result; + + /// Reconstruct the snapshot at `tx_offset` using the supplied page pool. + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result; + + /// Return the latest snapshot at or before `upper_bound`, if one exists. + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError>; + + /// Return the latest snapshot in this backend, if one exists. + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + /// Attempt to compress all snapshots that fall into `range`, and record + /// the outcome in `stats`. + /// + /// The snapshots in `range` are traversed in ascending order. + /// If an error occurs, processing stops and the error is returned. + /// + /// See [CompressionStats] for how to interpret the results. + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError>; + + /// Invalidate every snapshot newer than `upper_bound`. + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; + + /// Invalidate the snapshot at `tx_offset`. + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; +} + +impl SnapshotRepo for SnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn database_identity(&self) -> Identity { + SnapshotRepository::database_identity(self) + } + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + Ok(Box::new(SnapshotRepository::create_snapshot( + self, tables, blobs, tx_offset, + )?)) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + SnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + SnapshotRepository::latest_snapshot(self) + } + + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError> { + SnapshotRepository::compress_snapshots(self, stats, range) + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + pub struct ReconstructedSnapshot { /// The identity of the snapshotted database. pub database_identity: Identity, From f508a0462f806452daa7cf913ca34aefc684a332 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 17:05:14 +0530 Subject: [PATCH 02/40] lint --- crates/commitlog/src/repo/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 51df7accb81..358936c3c2a 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -153,6 +153,7 @@ pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +#[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} impl Repo for &T { From 5356b8186b3102efb41ab401e9f5de805f4bf391 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 17:13:02 +0530 Subject: [PATCH 03/40] Add runtime crate and RuntimeDispatch integration --- Cargo.lock | 37 +- Cargo.toml | 4 +- crates/core/Cargo.toml | 2 + crates/core/src/database_logger.rs | 6 +- crates/core/src/db/durability.rs | 15 +- crates/core/src/db/persistence.rs | 29 +- crates/core/src/db/relational_db.rs | 48 +- crates/core/src/db/snapshot.rs | 97 ++- crates/core/src/lib.rs | 1 + crates/core/src/runtime.rs | 3 + .../subscription/module_subscription_actor.rs | 2 +- crates/durability/Cargo.toml | 1 + crates/durability/src/imp/local.rs | 78 +- crates/durability/src/imp/mod.rs | 5 + crates/durability/tests/io/fallocate.rs | 2 +- crates/runtime/Cargo.toml | 24 + crates/runtime/LICENSE | 731 ++++++++++++++++++ crates/runtime/README.md | 182 +++++ crates/runtime/src/lib.rs | 122 +++ crates/runtime/src/sim/executor.rs | 589 ++++++++++++++ crates/runtime/src/sim/mod.rs | 23 + crates/runtime/src/sim/rng.rs | 367 +++++++++ crates/runtime/src/sim/system_thread.rs | 64 ++ crates/runtime/src/sim/time.rs | 343 ++++++++ crates/standalone/Cargo.toml | 2 +- crates/standalone/src/subcommands/start.rs | 97 ++- 26 files changed, 2721 insertions(+), 153 deletions(-) create mode 100644 crates/core/src/runtime.rs create mode 100644 crates/runtime/Cargo.toml create mode 100644 crates/runtime/LICENSE create mode 100644 crates/runtime/README.md create mode 100644 crates/runtime/src/lib.rs create mode 100644 crates/runtime/src/sim/executor.rs create mode 100644 crates/runtime/src/sim/mod.rs create mode 100644 crates/runtime/src/sim/rng.rs create mode 100644 crates/runtime/src/sim/system_thread.rs create mode 100644 crates/runtime/src/sim/time.rs diff --git a/Cargo.lock b/Cargo.lock index 24761b41064..a0193647eae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,7 +34,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "once_cell", "version_check", ] @@ -276,6 +276,12 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.89" @@ -2551,9 +2557,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "a06fddc2749e0528d2813f95e050e87e52c8cbbae56223b9babf73b3e53b0cc6" dependencies = [ "cfg-if", "js-sys", @@ -6113,7 +6119,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", ] [[package]] @@ -6187,7 +6193,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.13", "libredox", "thiserror 1.0.69", ] @@ -6416,7 +6422,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.13", "libc", "untrusted", "windows-sys 0.52.0", @@ -7649,7 +7655,7 @@ checksum = "db18cb19c7499ba4a65b1504442179a7e4aba487dc35978d90966c5ca02ee16b" dependencies = [ "bytemuck", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "log", "rand 0.8.5", "scoped-tls", @@ -7668,7 +7674,7 @@ dependencies = [ "bytemuck", "bytes", "derive_more 0.99.20", - "getrandom 0.2.16", + "getrandom 0.2.13", "http 1.3.1", "insta", "log", @@ -8093,6 +8099,7 @@ dependencies = [ "spacetimedb-physical-plan", "spacetimedb-primitives 2.2.0", "spacetimedb-query", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-snapshot", @@ -8190,6 +8197,7 @@ dependencies = [ "spacetimedb-commitlog", "spacetimedb-fs-utils", "spacetimedb-paths", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "tempfile", "thiserror 1.0.69", @@ -8458,6 +8466,19 @@ dependencies = [ "spacetimedb-lib 2.2.0", ] +[[package]] +name = "spacetimedb-runtime" +version = "2.2.0" +dependencies = [ + "anyhow", + "async-task", + "futures", + "futures-util", + "libc", + "tokio", + "tracing", +] + [[package]] name = "spacetimedb-sats" version = "1.9.0" diff --git a/Cargo.toml b/Cargo.toml index 75deef78a3b..f0678e29cc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ members = [ "crates/physical-plan", "crates/primitives", "crates/query", + "crates/runtime", "crates/sats", "crates/schema", "crates/smoketests", @@ -138,6 +139,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.2.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.2.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.2.0" } spacetimedb-query = { path = "crates/query", version = "=2.2.0" } +spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0", default-features = false } spacetimedb-sats = { path = "crates/sats", version = "=2.2.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.2.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.2.0" } @@ -388,7 +390,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(simulation)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index acdc578080d..2947eccac9d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -28,6 +28,7 @@ spacetimedb-primitives.workspace = true spacetimedb-paths.workspace = true spacetimedb-physical-plan.workspace = true spacetimedb-query.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats = { workspace = true, features = ["serde"] } spacetimedb-schema.workspace = true spacetimedb-table.workspace = true @@ -133,6 +134,7 @@ tikv-jemalloc-ctl = {workspace = true} [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true, features = ["sched"] } + [features] # Print a warning when doing an unindexed `iter_by_col_range` on a large table. unindexed_iter_by_col_range_warn = [] diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 0e202229dea..f194cb60a48 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader}; +use tokio::io::{AsyncRead, BufReader, ReadBuf}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index c17a10e9f63..3a466d53eb6 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -9,9 +9,8 @@ use spacetimedb_datastore::{execution_context::ReducerContext, traits::TxData}; use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use tokio::{runtime, time::timeout}; -use crate::db::persistence::Durability; +use crate::{db::persistence::Durability, runtime::RuntimeDispatch}; pub(super) fn request_durability( durability: &Durability, @@ -32,11 +31,12 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle, database_identity: Identity) { - let rt = runtime.clone(); - rt.spawn(async move { - let label = format!("[{database_identity}]"); - match timeout(Duration::from_secs(10), durability.close()).await { +pub(super) fn spawn_close(durability: Arc, runtime: &RuntimeDispatch, database_identity: Identity) { + let label = format!("[{database_identity}]"); + let runtime = runtime.clone(); + runtime.clone().spawn(async move { + log::info!("starting spawn close"); + match runtime.timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { error!("{label} timeout waiting for durability shutdown"); } @@ -44,6 +44,7 @@ pub(super) fn spawn_close(durability: Arc, runtime: &runtime::Handle info!("{label} durability shut down at tx offset: {offset:?}"); } } + log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 5b0daa5145c..83d58befb06 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,7 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, util::asyncify}; +use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; use super::{ relational_db::{self, Txdata}, @@ -41,8 +41,8 @@ pub struct Persistence { /// persistent (as opposed to in-memory) databases. This is enforced by /// this type. pub snapshots: Option, - /// The tokio runtime onto which durability-related tasks shall be spawned. - pub runtime: tokio::runtime::Handle, + /// Runtime onto which durability-related tasks shall be spawned. + pub runtime: RuntimeDispatch, } impl Persistence { @@ -52,6 +52,15 @@ impl Persistence { disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, runtime: tokio::runtime::Handle, + ) -> Self { + Self::new_with_runtime(durability, disk_size, snapshots, RuntimeDispatch::tokio(runtime)) + } + + pub fn new_with_runtime( + durability: impl spacetimedb_durability::Durability + 'static, + disk_size: impl Fn() -> io::Result + Send + Sync + 'static, + snapshots: Option, + runtime: RuntimeDispatch, ) -> Self { Self { durability: Arc::new(durability), @@ -61,7 +70,7 @@ impl Persistence { } } - /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. + /// If snapshots are enabled, get the snapshot repository they are stored in. pub fn snapshot_repo(&self) -> Option> { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } @@ -91,7 +100,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -148,7 +157,13 @@ impl PersistenceProvider for LocalPersistenceProvider { let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled))?; + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Enabled, + RuntimeDispatch::tokio_current(), + ) + })?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( @@ -162,7 +177,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: tokio::runtime::Handle::current(), + runtime: RuntimeDispatch::tokio_current(), }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 6df38806dca..57c7cde59cc 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,6 +1,7 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; +use crate::runtime::RuntimeDispatch; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -41,6 +42,8 @@ use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; +#[cfg(test)] +use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -51,7 +54,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepo, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -99,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -133,10 +136,13 @@ impl std::fmt::Debug for RelationalDB { impl Drop for RelationalDB { fn drop(&mut self) { + log::info!("starting drop"); // Attempt to flush the outstanding transactions. if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); } + + log::info!("drop done"); } } @@ -233,11 +239,12 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - `snapshot_repo` + /// - snapshots /// - /// The [`SnapshotRepo`] which stores snapshots of this database. + /// Optional snapshot persistence and background snapshot execution, + /// carried through [`Persistence`]. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the `snapshot_repo` must + /// If restoring from an existing database, the snapshot repository must /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` @@ -480,7 +487,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &DynSnapshotRepo, + snapshot_repo: &(impl SnapshotRepo + ?Sized), snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -614,7 +621,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in store"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -1671,7 +1678,7 @@ pub async fn local_durability( replica_dir: ReplicaDir, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let rt = tokio::runtime::Handle::current(); + let runtime = RuntimeDispatch::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -1683,7 +1690,7 @@ pub async fn local_durability( let local = asyncify(move || { durability::Local::open( replica_dir.clone(), - rt, + runtime, <_>::default(), // Give the durability a handle to request a new snapshot run, // which it will send down whenever we rotate commitlog segments. @@ -1803,6 +1810,7 @@ pub mod tests_utils { use spacetimedb_fs_utils::compression::CompressType; use spacetimedb_lib::{bsatn::to_vec, ser::Serialize}; use spacetimedb_paths::server::ReplicaDir; + use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::FromPathUnchecked; use tempfile::TempDir; @@ -1950,7 +1958,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; @@ -1961,7 +1975,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let (db, _) = RelationalDB::open( @@ -2073,7 +2087,13 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0) - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Disabled)) + .map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + RuntimeDispatch::tokio(rt.clone()), + ) + }) }) .transpose()?; let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; @@ -2082,7 +2102,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: rt, + runtime: RuntimeDispatch::tokio(rt), }; let db = Self::open_db(history, Some(persistence), None, 0)?; @@ -2116,7 +2136,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 26e3d8373cf..dda981a89bd 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,10 +14,10 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; use tokio::sync::watch; -use crate::{util::asyncify, worker_metrics::WORKER_METRICS}; +use crate::{runtime::RuntimeDispatch, worker_metrics::WORKER_METRICS}; pub type SnapshotDatabaseState = Arc>; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repo: Arc, } impl SnapshotWorker { @@ -69,29 +69,26 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repo: Arc, runtime: RuntimeDispatch) -> Self { + let database = snapshot_repo.database_identity(); + let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), - compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), - metrics: CompressionMetrics::new(database), - stats: <_>::default(), - }), + runtime: runtime.clone(), + compression: None, }; - tokio::spawn(actor.run()); + runtime.spawn(actor.run()); Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository, + snapshot_repo, } } @@ -105,9 +102,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the snapshot repo this worker is operating on. + /// Get the snapshot repository this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repository.clone() + self.snapshot_repo.clone() } /// Request a snapshot to be taken. @@ -141,6 +138,40 @@ impl SnapshotWorker { } } +impl SnapshotWorker { + pub fn new_with_repository( + snapshot_repository: Arc, + compression: Compression, + runtime: RuntimeDispatch, + ) -> Self { + let database = snapshot_repository.database_identity(); + let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + let (snapshot_created, _) = watch::channel(latest_snapshot); + let (request_tx, request_rx) = mpsc::unbounded(); + + let actor = SnapshotWorkerActor { + snapshot_requests: request_rx, + snapshot_repo: snapshot_repository.clone(), + snapshot_created: snapshot_created.clone(), + metrics: SnapshotMetrics::new(database), + runtime: runtime.clone(), + compression: compression.is_enabled().then(|| Compressor { + snapshot_repo: snapshot_repository.clone(), + metrics: CompressionMetrics::new(database), + stats: <_>::default(), + runtime: runtime.clone(), + }), + }; + runtime.spawn(actor.run()); + + Self { + snapshot_created, + request_snapshot: request_tx, + snapshot_repo: snapshot_repository, + } + } +} + struct SnapshotMetrics { snapshot_timing_total: Histogram, snapshot_timing_inner: Histogram, @@ -169,6 +200,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, + runtime: RuntimeDispatch, compression: Option, } @@ -220,21 +252,24 @@ impl SnapshotWorkerActor { let inner_timer = self.metrics.snapshot_timing_inner.clone(); let snapshot_repo = self.snapshot_repo.clone(); + let runtime = self.runtime.clone(); let database_identity = self.snapshot_repo.database_identity(); - let maybe_snapshot = asyncify(move || { - let _timer = inner_timer.start_timer(); - Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) - }) - .await - .with_context(|| format!("error capturing snapshot of database {}", database_identity))?; - let (snapshot_offset, unflushed_snapshot) = maybe_snapshot.with_context(|| { - format!( - "refusing to take snapshot of database {} at TX offset -1", - database_identity - ) - })?; + let maybe_snapshot = runtime + .spawn_blocking(move || { + let _timer = inner_timer.start_timer(); + Locking::take_snapshot_internal(&state, snapshot_repo.as_ref()) + }) + .await + .with_context(|| format!("error capturing snapshot of database {}", database_identity))? + .with_context(|| { + format!( + "refusing to take snapshot of database {} at TX offset -1", + database_identity + ) + })?; + let (snapshot_offset, unflushed_snapshot) = maybe_snapshot; self.metrics .snapshot_timing_fsync .observe_closure_duration(|| unflushed_snapshot.sync_all())?; @@ -307,9 +342,10 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, + runtime: RuntimeDispatch, } impl Compressor { @@ -341,7 +377,8 @@ impl Compressor { let range = start..latest_snapshot; let mut stats = self.stats.take().unwrap_or_default(); - let (mut stats, res) = asyncify({ + let runtime = self.runtime.clone(); + let (mut stats, res) = runtime.spawn_blocking({ let range = range.clone(); move || { let _timer = inner_timer.start_timer(); diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 26b35230b1f..4a7246bcbd7 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,6 +18,7 @@ pub mod estimation; pub mod host; pub mod module_host_context; pub mod replica_context; +pub mod runtime; pub mod startup; pub mod subscription; pub mod util; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs new file mode 100644 index 00000000000..53baad4d73a --- /dev/null +++ b/crates/core/src/runtime.rs @@ -0,0 +1,3 @@ +//! Runtime boundary re-exported for core call sites. + +pub use spacetimedb_runtime::{current_handle_or_new_runtime, Handle, Runtime, RuntimeDispatch, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 83760252a5e..f82d36286d4 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2061,7 +2061,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: rt, + runtime: crate::runtime::RuntimeDispatch::tokio(rt), }), None, 0, diff --git a/crates/durability/Cargo.toml b/crates/durability/Cargo.toml index 0ea8022fcbe..4eaa3870001 100644 --- a/crates/durability/Cargo.toml +++ b/crates/durability/Cargo.toml @@ -21,6 +21,7 @@ scopeguard.workspace = true spacetimedb-commitlog.workspace = true spacetimedb-fs-utils.workspace = true spacetimedb-paths.workspace = true +spacetimedb-runtime = { workspace = true, features = ["tokio"] } spacetimedb-sats.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 5cc03099ab6..51d89e2e848 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -10,7 +10,6 @@ use std::{ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; -use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, @@ -19,11 +18,9 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; +use spacetimedb_runtime::RuntimeDispatch; use thiserror::Error; -use tokio::{ - sync::watch, - task::{spawn_blocking, JoinHandle}, -}; +use tokio::sync::{oneshot, watch}; use tracing::{instrument, Span}; use crate::{Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; @@ -109,9 +106,9 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// [JoinHandle] for the actor task. Contains `None` if already cancelled - /// (via [Durability::close]). - actor: Mutex>>, + /// Completion notification for the background actor. Contains `None` once + /// consumed by [`Durability::close`]. + actor_done: Mutex>>, } impl Local { @@ -119,13 +116,13 @@ impl Local { /// /// `replica_dir` must already exist. /// - /// Background tasks are spawned onto the provided tokio runtime. + /// Background tasks are spawned onto the provided runtime. /// /// We will send a message down the `on_new_segment` channel whenever we begin a new commitlog segment. /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, on_new_segment: Option>, ) -> Result { @@ -140,7 +137,7 @@ impl Local { opts.commitlog, on_new_segment, )?); - Self::open_inner(clog, rt, opts, Some(lock)) + Self::open_inner(clog, runtime, opts, Some(lock)) } } @@ -151,7 +148,7 @@ where { fn open_inner( clog: Arc, R>>, - rt: tokio::runtime::Handle, + runtime: RuntimeDispatch, opts: Options, lock: Option, ) -> Result { @@ -159,19 +156,17 @@ where let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - - let actor = rt.spawn( + let (actor_done_tx, actor_done_rx) = oneshot::channel(); + runtime.spawn( Actor { clog: clog.clone(), - durable_offset: durable_tx, queue_depth: queue_depth.clone(), - batch_capacity: opts.batch_capacity, - - lock, + runtime: runtime.clone(), + _lock: lock, } - .run(txdata_rx), + .run(txdata_rx, actor_done_tx), ); Ok(Self { @@ -179,7 +174,7 @@ where durable_offset: durable_rx, queue, queue_depth, - actor: Mutex::new(Some(actor)), + actor_done: Mutex::new(Some(actor_done_rx)), }) } @@ -195,10 +190,10 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + pub fn open_with_repo(repo: R, runtime: RuntimeDispatch, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts, None) + Self::open_inner(clog, runtime, opts, None) } } @@ -246,9 +241,9 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, + runtime: RuntimeDispatch, - #[allow(unused)] - lock: Option, + _lock: Option, } impl Actor @@ -257,7 +252,7 @@ where R: Repo + Send + Sync + 'static, { #[instrument(name = "durability::local::actor", skip_all)] - async fn run(self, transactions_rx: async_channel::Receiver>>) { + async fn run(self, transactions_rx: async_channel::Receiver>>, done: oneshot::Sender<()>) { info!("starting durability actor"); let mut tx_buf = Vec::with_capacity(self.batch_capacity.get()); @@ -281,14 +276,14 @@ where let clog = self.clog.clone(); let ready_len = tx_buf.len(); self.queue_depth.fetch_sub(ready_len as u64, Relaxed); - tx_buf = spawn_blocking(move || -> io::Result>>> { + let runtime = self.runtime.clone(); + tx_buf = runtime.spawn_blocking(move || -> io::Result>>> { for tx in tx_buf.drain(..) { clog.commit([tx.into_transaction()])?; } Ok(tx_buf) }) .await - .expect("commitlog write panicked") .expect("commitlog write failed"); if self.flush_and_sync().await.is_err() { sync_on_exit = false; @@ -305,6 +300,7 @@ where } info!("exiting durability actor"); + let _ = done.send(()); } #[instrument(skip_all)] @@ -318,12 +314,13 @@ where let clog = self.clog.clone(); let span = Span::current(); - spawn_blocking(move || { + let runtime = self.runtime.clone(); + runtime + .spawn_blocking(move || { let _span = span.enter(); clog.flush_and_sync() }) .await - .expect("commitlog flush-and-sync blocking task panicked") .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) .inspect(|maybe_offset| { if let Some(new_offset) = maybe_offset { @@ -356,29 +353,14 @@ where info!("close local durability"); let durable_offset = self.durable_tx_offset(); - let maybe_actor = self.actor.lock().unwrap().take(); - // Abort actor if shutdown future is dropped. - let abort = scopeguard::guard( - maybe_actor.as_ref().map(|join_handle| join_handle.abort_handle()), - |maybe_abort_handle| { - if let Some(abort_handle) = maybe_abort_handle { - warn!("close future dropped, aborting durability actor"); - abort_handle.abort(); - } - }, - ); + let maybe_actor_done = self.actor_done.lock().unwrap().take(); self.queue.close(); async move { - if let Some(actor) = maybe_actor - && let Err(e) = actor.await + if let Some(actor_done) = maybe_actor_done + && actor_done.await.is_err() { - // Will print "durability actor: task was cancelled" - // or "durability actor: task panicked [...]" - warn!("durability actor: {e}"); + warn!("durability actor completion signal dropped"); } - // Don't abort if the actor completed. - let _ = ScopeGuard::into_inner(abort); - durable_offset.last_seen() } .boxed() diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 3e00ae21ee1..77f0998e6f8 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -56,4 +56,9 @@ mod testing { future::ready(*self.durable_offset.borrow()).boxed() } } + + #[cfg(test)] + mod tests { + use super::*; + } } diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs index 64e50faf4cc..be5ee61bc0b 100644 --- a/crates/durability/tests/io/fallocate.rs +++ b/crates/durability/tests/io/fallocate.rs @@ -161,7 +161,7 @@ async fn local_durability( ) -> Result, spacetimedb_durability::local::OpenError> { spacetimedb_durability::Local::open( dir, - tokio::runtime::Handle::current(), + spacetimedb_runtime::RuntimeDispatch::tokio_current(), spacetimedb_durability::local::Options { commitlog: spacetimedb_commitlog::Options { max_segment_size, diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml new file mode 100644 index 00000000000..6f62e0e6b08 --- /dev/null +++ b/crates/runtime/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "spacetimedb-runtime" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Runtime and deterministic simulation utilities for SpacetimeDB" +rust-version.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +futures.workspace = true +futures-util.workspace = true +tokio = { workspace = true, optional = true } +async-task = { version = "4.4", optional = true } +libc = { version = "0.2", optional = true } +tracing = { workspace = true, optional = true } + +[features] +default = ["tokio"] +tokio = ["dep:tokio"] +simulation = ["dep:async-task", "dep:libc", "dep:tracing"] diff --git a/crates/runtime/LICENSE b/crates/runtime/LICENSE new file mode 100644 index 00000000000..daef5135277 --- /dev/null +++ b/crates/runtime/LICENSE @@ -0,0 +1,731 @@ +SPACETIMEDB BUSINESS SOURCE LICENSE AGREEMENT + +Business Source License 1.1 + +Parameters + +Licensor: Clockwork Laboratories, Inc. +Licensed Work: SpacetimeDB 2.2.0 + The Licensed Work is + (c) 2023 Clockwork Laboratories, Inc. + +Additional Use Grant: You may make use of the Licensed Work provided your + application or service uses the Licensed Work with no + more than one SpacetimeDB instance in production and + provided that you do not use the Licensed Work for a + Database Service. + + A “Database Service” is a commercial offering that + allows third parties (other than your employees and + contractors) to access the functionality of the + Licensed Work by creating tables whose schemas are + controlled by such third parties. + +Change Date: 2031-04-29 + +Change License: GNU Affero General Public License v3.0 with a linking + exception + +For information about alternative licensing arrangements for the Software, +please visit: https://spacetimedb.com + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. + +----------------------------------------------------------------------------- + +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License, version 3, as published +by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU Affero General Public License +along with this program; if not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission to convey the resulting work. + +Additional permission under GNU AGPL version 3 section 13 + +If you modify this Program, or any covered work, by linking or combining it +with SpacetimeDB (or a modified version of that library), containing parts +covered by the terms of the AGPL v3.0, the licensors of this Program grant +you additional permission that, notwithstanding any other provision of this +License, you need not prominently offer all users interacting with your +modified version remotely through a computer network an opportunity to +receive the Corresponding Source of your version from a network server at no +charge, if your version supports such interaction. This permission does not +waive or modify any other obligations or terms of the AGPL v3.0, except for +the specific requirement set forth in section 13. + +A copy of the AGPL v3.0 license is reproduced below. + + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + +Copyright © 2007 Free Software Foundation, Inc. +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed to take +away your freedom to share and change the works. By contrast, our General +Public Licenses are intended to guarantee your freedom to share and change +all versions of a program--to make sure it remains free software for all its +users. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for them if you wish), that +you receive source code or can get it if you want it, that you can change the +software or use pieces of it in new free programs, and that you know you can +do these things. + +Developers that use our General Public Licenses protect your rights with two +steps: (1) assert copyright on the software, and (2) offer you this License +which gives you legal permission to copy, distribute and/or modify the +software. + +A secondary benefit of defending all users' freedom is that improvements made +in alternate versions of the program, if they receive widespread use, become +available for other developers to incorporate. Many developers of free +software are heartened and encouraged by the resulting cooperation. However, +in the case of software used on network servers, this result may fail to come +about. The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its source +code to the public. + +The GNU Affero General Public License is designed specifically to ensure +that, in such cases, the modified source code becomes available to the +community. It requires the operator of a network server to provide the source +code of the modified version running there to the users of that server. +Therefore, public use of a modified version, on a publicly accessible server, +gives the public access to the source code of the modified version. + +An older license, called the Affero General Public License and published by +Affero, was designed to accomplish similar goals. This is a different +license, not a version of the Affero GPL, but Affero has released a new +version of the Affero GPL which permits relicensing under this license. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS +0. Definitions. +"This License" refers to version 3 of the GNU Affero General Public License. + +"Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this License. +Each licensee is addressed as "you". "Licensees" and "recipients" may be +individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work in a +fashion requiring copyright permission, other than the making of an exact +copy. The resulting work is called a "modified version" of the earlier work +or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based on the +Program. + +To "propagate" a work means to do anything with it that, without permission, +would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a +private copy. Propagation includes copying, distribution (with or without +modification), making available to the public, and in some countries other +activities as well. + +To "convey" a work means any kind of propagation that enables other parties +to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays "Appropriate Legal Notices" to the +extent that it includes a convenient and prominently visible feature that (1) +displays an appropriate copyright notice, and (2) tells the user that there +is no warranty for the work (except to the extent that warranties are +provided), that licensees may convey the work under this License, and how to +view a copy of this License. If the interface presents a list of user +commands or options, such as a menu, a prominent item in the list meets this +criterion. + +1. Source Code. +The "source code" for a work means the preferred form of the work for making +modifications to it. "Object code" means any non-source form of a work. + +A "Standard Interface" means an interface that either is an official standard +defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used +among developers working in that language. + +The "System Libraries" of an executable work include anything, other than the +work as a whole, that (a) is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and (b) serves only +to enable use of the work with that Major Component, or to implement a +Standard Interface for which an implementation is available to the public in +source code form. A "Major Component", in this context, means a major +essential component (kernel, window system, and so on) of the specific +operating system (if any) on which the executable work runs, or a compiler +used to produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the +object code and to modify the work, including scripts to control those +activities. However, it does not include the work's System Libraries, or +general-purpose tools or generally available free programs which are used +unmodified in performing those activities but which are not part of the work. +For example, Corresponding Source includes interface definition files +associated with source files for the work, and the source code for shared +libraries and dynamically linked subprograms that the work is specifically +designed to require, such as by intimate data communication or control flow +between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +2. Basic Permissions. +All rights granted under this License are granted for the term of copyright +on the Program, and are irrevocable provided the stated conditions are met. +This License explicitly affirms your unlimited permission to run the +unmodified Program. The output from running a covered work is covered by this +License only if the output, given its content, constitutes a covered work. +This License acknowledges your rights of fair use or other equivalent, as +provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey +covered works to others for the sole purpose of having them make +modifications exclusively for you, or provide you with facilities for running +those works, provided that you comply with the terms of this License in +conveying all material for which you do not control copyright. Those thus +making or running the covered works for you must do so exclusively on your +behalf, under your direction and control, on terms that prohibit them from +making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes it +unnecessary. + +3. Protecting Users' Legal Rights From Anti-Circumvention Law. +No covered work shall be deemed part of an effective technological measure +under any applicable law fulfilling obligations under article 11 of the WIPO +copyright treaty adopted on 20 December 1996, or similar laws prohibiting or +restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention is +effected by exercising rights under this License with respect to the covered +work, and you disclaim any intention to limit operation or modification of +the work as a means of enforcing, against the work's users, your or third +parties' legal rights to forbid circumvention of technological measures. + +4. Conveying Verbatim Copies. +You may convey verbatim copies of the Program's source code as you receive +it, in any medium, provided that you conspicuously and appropriately publish +on each copy an appropriate copyright notice; keep intact all notices stating +that this License and any non-permissive terms added in accord with section 7 +apply to the code; keep intact all notices of the absence of any warranty; +and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you +may offer support or warranty protection for a fee. + +5. Conveying Modified Source Versions. +You may convey a work based on the Program, or the modifications to produce +it from the Program, in the form of source code under the terms of section 4, +provided that you also meet all of these conditions: + +a) The work must carry prominent notices stating that you modified it, and +giving a relevant date. +b) The work must carry prominent notices stating that it is released under +this License and any conditions added under section 7. This requirement +modifies the requirement in section 4 to "keep intact all notices". +c) You must license the entire work, as a whole, under this License to anyone +who comes into possession of a copy. This License will therefore apply, along +with any applicable section 7 additional terms, to the whole of the work, and +all its parts, regardless of how they are packaged. This License gives no +permission to license the work in any other way, but it does not invalidate +such permission if you have separately received it. +d) If the work has interactive user interfaces, each must display Appropriate +Legal Notices; however, if the Program has interactive interfaces that do not +display Appropriate Legal Notices, your work need not make them do so. +A compilation of a covered work with other separate and independent works, +which are not by their nature extensions of the covered work, and which are +not combined with it such as to form a larger program, in or on a volume of a +storage or distribution medium, is called an "aggregate" if the compilation +and its resulting copyright are not used to limit the access or legal rights +of the compilation's users beyond what the individual works permit. Inclusion +of a covered work in an aggregate does not cause this License to apply to the +other parts of the aggregate. + +6. Conveying Non-Source Forms. +You may convey a covered work in object code form under the terms of sections +4 and 5, provided that you also convey the machine-readable Corresponding +Source under the terms of this License, in one of these ways: + +a) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by the Corresponding Source fixed +on a durable physical medium customarily used for software interchange. +b) Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by a written offer, valid for at +least three years and valid for as long as you offer spare parts or customer +support for that product model, to give anyone who possesses the object code +either (1) a copy of the Corresponding Source for all the software in the +product that is covered by this License, on a durable physical medium +customarily used for software interchange, for a price no more than your +reasonable cost of physically performing this conveying of source, or (2) +access to copy the Corresponding Source from a network server at no charge. +c) Convey individual copies of the object code with a copy of the written +offer to provide the Corresponding Source. This alternative is allowed only +occasionally and noncommercially, and only if you received the object code +with such an offer, in accord with subsection 6b. +d) Convey the object code by offering access from a designated place (gratis +or for a charge), and offer equivalent access to the Corresponding Source in +the same way through the same place at no further charge. You need not +require recipients to copy the Corresponding Source along with the object +code. If the place to copy the object code is a network server, the +Corresponding Source may be on a different server (operated by you or a third +party) that supports equivalent copying facilities, provided you maintain +clear directions next to the object code saying where to find the +Corresponding Source. Regardless of what server hosts the Corresponding +Source, you remain obligated to ensure that it is available for as long as +needed to satisfy these requirements. +e) Convey the object code using peer-to-peer transmission, provided you +inform other peers where the object code and Corresponding Source of the work +are being offered to the general public at no charge under subsection 6d. +A separable portion of the object code, whose source code is excluded from +the Corresponding Source as a System Library, need not be included in +conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any tangible +personal property which is normally used for personal, family, or household +purposes, or (2) anything designed or sold for incorporation into a dwelling. +In determining whether a product is a consumer product, doubtful cases shall +be resolved in favor of coverage. For a particular product received by a +particular user, "normally used" refers to a typical or common use of that +class of product, regardless of the status of the particular user or of the +way in which the particular user actually uses, or expects or is expected to +use, the product. A product is a consumer product regardless of whether the +product has substantial commercial, industrial or non-consumer uses, unless +such uses represent the only significant mode of use of the product. + +"Installation Information" for a User Product means any methods, procedures, +authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified +version of its Corresponding Source. The information must suffice to ensure +that the continued functioning of the modified object code is in no case +prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as part of +a transaction in which the right of possession and use of the User Product is +transferred to the recipient in perpetuity or for a fixed term (regardless of +how the transaction is characterized), the Corresponding Source conveyed +under this section must be accompanied by the Installation Information. But +this requirement does not apply if neither you nor any third party retains +the ability to install modified object code on the User Product (for example, +the work has been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates for +a work that has been modified or installed by the recipient, or for the User +Product in which it has been modified or installed. Access to a network may +be denied when the modification itself materially and adversely affects the +operation of the network or violates the rules and protocols for +communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in +accord with this section must be in a format that is publicly documented (and +with an implementation available to the public in source code form), and must +require no special password or key for unpacking, reading or copying. + +7. Additional Terms. +"Additional permissions" are terms that supplement the terms of this License +by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as +though they were included in this License, to the extent that they are valid +under applicable law. If additional permissions apply only to part of the +Program, that part may be used separately under those permissions, but the +entire Program remains governed by this License without regard to the +additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when +you modify the work.) You may place additional permissions on material, added +by you to a covered work, for which you have or can give appropriate +copyright permission. + +Notwithstanding any other provision of this License, for material you add to +a covered work, you may (if authorized by the copyright holders of that +material) supplement the terms of this License with terms: + +a) Disclaiming warranty or limiting liability differently from the terms of +sections 15 and 16 of this License; or +b) Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices displayed +by works containing it; or +c) Prohibiting misrepresentation of the origin of that material, or requiring +that modified versions of such material be marked in reasonable ways as +different from the original version; or +d) Limiting the use for publicity purposes of names of licensors or authors +of the material; or +e) Declining to grant rights under trademark law for use of some trade names, +trademarks, or service marks; or +f) Requiring indemnification of licensors and authors of that material by +anyone who conveys the material (or modified versions of it) with contractual +assumptions of liability to the recipient, for any liability that these +contractual assumptions directly impose on those licensors and authors. +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is governed +by this License along with a term that is a further restriction, you may +remove that term. If a license document contains a further restriction but +permits relicensing or conveying under this License, you may add to a covered +work material governed by the terms of that license document, provided that +the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must +place, in the relevant source files, a statement of the additional terms that +apply to those files, or a notice indicating where to find the applicable +terms. + +Additional terms, permissive or non-permissive, may be stated in the form of +a separately written license, or stated as exceptions; the above requirements +apply either way. + +8. Termination. +You may not propagate or modify a covered work except as expressly provided +under this License. Any attempt otherwise to propagate or modify it is void, +and will automatically terminate your rights under this License (including +any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated (a) provisionally, unless and until +the copyright holder explicitly and finally terminates your license, and (b) +permanently, if the copyright holder fails to notify you of the violation by +some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated +permanently if the copyright holder notifies you of the violation by some +reasonable means, this is the first time you have received notice of +violation of this License (for any work) from that copyright holder, and you +cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses +of parties who have received copies or rights from you under this License. If +your rights have been terminated and not permanently reinstated, you do not +qualify to receive new licenses for the same material under section 10. + +9. Acceptance Not Required for Having Copies. +You are not required to accept this License in order to receive or run a copy +of the Program. Ancillary propagation of a covered work occurring solely as a +consequence of using peer-to-peer transmission to receive a copy likewise +does not require acceptance. However, nothing other than this License grants +you permission to propagate or modify any covered work. These actions +infringe copyright if you do not accept this License. Therefore, by modifying +or propagating a covered work, you indicate your acceptance of this License +to do so. + +10. Automatic Licensing of Downstream Recipients. +Each time you convey a covered work, the recipient automatically receives a +license from the original licensors, to run, modify and propagate that work, +subject to this License. You are not responsible for enforcing compliance by +third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered work +results from an entity transaction, each party to that transaction who +receives a copy of the work also receives whatever licenses to the work the +party's predecessor in interest had or could give under the previous +paragraph, plus a right to possession of the Corresponding Source of the work +from the predecessor in interest, if the predecessor has it or can get it +with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights +granted or affirmed under this License. For example, you may not impose a +license fee, royalty, or other charge for exercise of rights granted under +this License, and you may not initiate litigation (including a cross-claim or +counterclaim in a lawsuit) alleging that any patent claim is infringed by +making, using, selling, offering for sale, or importing the Program or any +portion of it. + +11. Patents. +A "contributor" is a copyright holder who authorizes use under this License +of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter +acquired, that would be infringed by some manner, permitted by this License, +of making, using, or selling its contributor version, but do not include +claims that would be infringed only as a consequence of further modification +of the contributor version. For purposes of this definition, "control" +includes the right to grant patent sublicenses in a manner consistent with +the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent +license under the contributor's essential patent claims, to make, use, sell, +offer for sale, import and otherwise run, modify and propagate the contents +of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent (such +as an express permission to practice a patent or covenant not to sue for +patent infringement). To "grant" such a patent license to a party means to +make such an agreement or commitment not to enforce a patent against the +party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of +charge and under the terms of this License, through a publicly available +network server or other readily accessible means, then you must either (1) +cause the Corresponding Source to be so available, or (2) arrange to deprive +yourself of the benefit of the patent license for this particular work, or +(3) arrange, in a manner consistent with the requirements of this License, to +extend the patent license to downstream recipients. "Knowingly relying" means +you have actual knowledge that, but for the patent license, your conveying +the covered work in a country, or your recipient's use of the covered work in +a country, would infringe one or more identifiable patents in that country +that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, +you convey, or propagate by procuring conveyance of, a covered work, and +grant a patent license to some of the parties receiving the covered work +authorizing them to use, propagate, modify or convey a specific copy of the +covered work, then the patent license you grant is automatically extended to +all recipients of the covered work and works based on it. + +A patent license is "discriminatory" if it does not include within the scope +of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under +this License. You may not convey a covered work if you are a party to an +arrangement with a third party that is in the business of distributing +software, under which you make payment to the third party based on the extent +of your activity of conveying the work, and under which the third party +grants, to any of the parties who would receive the covered work from you, a +discriminatory patent license (a) in connection with copies of the covered +work conveyed by you (or copies made from those copies), or (b) primarily for +and in connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any +implied license or other defenses to infringement that may otherwise be +available to you under applicable patent law. + +12. No Surrender of Others' Freedom. +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not excuse +you from the conditions of this License. If you cannot convey a covered work +so as to satisfy simultaneously your obligations under this License and any +other pertinent obligations, then as a consequence you may not convey it at +all. For example, if you agree to terms that obligate you to collect a +royalty for further conveying from those to whom you convey the Program, the +only way you could satisfy both those terms and this License would be to +refrain entirely from conveying the Program. + +13. Remote Network Interaction; Use with the GNU General Public License. +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users interacting +with it remotely through a computer network (if your version supports such +interaction) an opportunity to receive the Corresponding Source of your +version by providing access to the Corresponding Source from a network server +at no charge, through some standard or customary means of facilitating +copying of software. This Corresponding Source shall include the +Corresponding Source for any work covered by version 3 of the GNU General +Public License that is incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have permission to +link or combine any covered work with a work licensed under version 3 of the +GNU General Public License into a single combined work, and to convey the +resulting work. The terms of this License will continue to apply to the part +which is the covered work, but the work with which it is combined will remain +governed by version 3 of the GNU General Public License. + +14. Revised Versions of this License. +The Free Software Foundation may publish revised and/or new versions of the +GNU Affero General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU Affero General Public +License "or any later version" applies to it, you have the option of +following the terms and conditions either of that numbered version or of any +later version published by the Free Software Foundation. If the Program does +not specify a version number of the GNU Affero General Public License, you +may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the +GNU Affero General Public License can be used, that proxy's public statement +of acceptance of a version permanently authorizes you to choose that version +for the Program. + +Later license versions may give you additional or different permissions. +However, no additional obligations are imposed on any author or copyright +holder as a result of your choosing to follow a later version. + +15. Disclaimer of Warranty. +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE +LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, +EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE +ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. +SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY +SERVICING, REPAIR OR CORRECTION. + +16. Limitation of Liability. +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE +PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE +OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR +DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR +A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH +HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +17. Interpretation of Sections 15 and 16. +If the disclaimer of warranty and limitation of liability provided above +cannot be given local legal effect according to their terms, reviewing courts +shall apply local law that most closely approximates an absolute waiver of +all civil liability in connection with the Program, unless a warranty or +assumption of liability accompanies a copy of the Program in return for a +fee. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively state the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + +SpacetimeDB: A database which replaces your server. +Copyright (C) 2023 Clockwork Laboratories, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +Also add information on how to contact you by electronic and paper mail. + +If your software can interact with users remotely through a computer network, +you should also make sure that it provides a way for users to get its source. +For example, if your program is a web application, its interface could +display a "Source" link that leads users to an archive of the code. There are +many ways you could offer source, and different solutions will be better for +different programs; see section 13 for the specific requirements. + +You should also get your employer (if you work as a programmer) or school, if +any, to sign a "copyright disclaimer" for the program, if necessary. For more +information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/runtime/README.md b/crates/runtime/README.md new file mode 100644 index 00000000000..f26134ba7bd --- /dev/null +++ b/crates/runtime/README.md @@ -0,0 +1,182 @@ +# spacetimedb-runtime + +`spacetimedb-runtime` is the small runtime abstraction layer shared by core +code and DST. It exists for one reason: code such as durability and +snapshotting needs to spawn work, run blocking sections, and wait with +timeouts, but we want that same code to run on either: + +- real Tokio in production, or +- the deterministic DST simulator in tests. + +The crate keeps that boundary narrow. Most callers should depend on +`RuntimeDispatch` instead of reaching directly for Tokio or simulator internals. + +## Top-level API + +The top-level module in [src/lib.rs](./src/lib.rs) exposes: + +- `RuntimeDispatch` + A small tagged runtime handle with two backends: + - `Tokio(tokio::runtime::Handle)` when the `tokio` feature is enabled + - `Simulation(sim::Handle)` when the `simulation` feature is enabled +- `spawn(...)` + Fire-and-forget task spawning. +- `spawn_blocking(...)` + Run blocking work on the runtime-appropriate backend. + On Tokio this uses `tokio::task::spawn_blocking`. + In simulation this is still scheduled through the simulator so ordering stays + deterministic. +- `timeout(...)` + Runtime-relative timeout handling. + On Tokio this uses `tokio::time::timeout`. + In simulation this uses virtual time from `sim::time`. +- `current_handle_or_new_runtime()` + Tokio convenience for production code that may or may not already be inside a + Tokio runtime. + +The design goal is intentionally modest: this crate is not a general async +framework. It is a compatibility layer for the small set of runtime operations +SpacetimeDB core code actually needs. + +## Features + +The crate has two independent backends: + +- `tokio` + Enables production runtime support and is part of the default feature set. +- `simulation` + Enables the deterministic local simulation runtime used by DST. + +Code can compile with one or both features enabled. `RuntimeDispatch` exposes +only the backends that were actually compiled in. + +## Simulation Modules + +The simulation backend lives under [src/sim](./src/sim). + +### `sim::mod` + +[src/sim/mod.rs](./src/sim/mod.rs) is the façade for the deterministic runtime. +It re-exports the main executor types and keeps the public surface small: + +- `Runtime` + Owns the simulator executor. +- `Handle` + Cloneable access to that executor from spawned tasks. +- `NodeId` + Logical node identifier used to group and pause/resume work. +- `JoinHandle` + Awaitable handle for spawned simulated tasks. +- `yield_now` + Cooperative yield point inside the simulator. +- `time` + Virtual time utilities. +- `Rng` and `DecisionSource` + Deterministic randomness primitives. + +It also exposes small helpers such as `advance_time(...)` and +`decision_source(...)`. + +### `sim::executor` + +[src/sim/executor.rs](./src/sim/executor.rs) is the heart of the simulator. + +It provides a single-threaded async executor adapted from madsim's task loop: + +- tasks are stored as `async_task` runnables +- ready work is chosen by a deterministic RNG instead of an OS/runtime scheduler +- node state can be paused and resumed +- a thread-local handle context makes the current simulation runtime accessible + from inside spawned work +- determinism can be checked by replaying the same future twice and comparing + the sequence of scheduler decisions + +Important behavior: + +- `Runtime::block_on(...)` drives the whole simulation +- `Handle::spawn_on(...)` schedules work onto a logical node +- absence of runnable work and absence of future timer wakeups is treated as a + hang, which is exactly what DST wants + +This module is the reason `RuntimeDispatch::Simulation` can behave like a real +runtime without giving up reproducibility. + +### `sim::time` + +[src/sim/time.rs](./src/sim/time.rs) implements virtual time. + +It provides: + +- `now()` + Current simulated time. +- `sleep(duration)` + A future that completes when simulated time reaches the deadline. +- `timeout(duration, future)` + Race a future against simulated time. +- `advance(duration)` + Move time forward explicitly. + +Internally it maintains: + +- a current `Duration` +- timer registrations keyed by deadline +- wakeups for due timers + +The executor uses this module to move time only when necessary, which keeps +tests deterministic and avoids tying correctness to wall-clock behavior. + +### `sim::rng` + +[src/sim/rng.rs](./src/sim/rng.rs) provides deterministic randomness. + +There are two layers: + +- `Rng` + Stateful deterministic RNG used by the executor and runtime internals. +- `DecisionSource` + Small lock-free source for probabilistic choices in test/workload code. + +This module also does two extra jobs: + +- records and checks determinism checkpoints so repeated seeded runs can prove + they took the same execution path +- hooks libc randomness calls such as `getrandom` so code running inside the + simulator sees deterministic randomness instead of ambient system entropy + +That second point matters because reproducibility falls apart quickly if a +dependency reads randomness outside the simulator's control. + +### `sim::system_thread` + +[src/sim/system_thread.rs](./src/sim/system_thread.rs) prevents accidental OS +thread creation while running under simulation. + +On Unix it intercepts `pthread_attr_init` and fails fast if code tries to spawn +real system threads from inside the simulator. That protects determinism and +enforces the intended execution model: simulated tasks should run on the +simulator, not escape onto real threads. + +## How This Crate Is Intended To Be Used + +For core code: + +- accept or store `RuntimeDispatch` +- use `spawn`, `spawn_blocking`, and `timeout` +- avoid embedding raw Tokio assumptions into shared logic + +For production-only code: + +- use `RuntimeDispatch::tokio_current()` or `RuntimeDispatch::tokio(handle)` + +For DST: + +- create `sim::Runtime` +- run the test harness with `Runtime::block_on(...)` +- pass `RuntimeDispatch::simulation_current()` into the code under test + +## Current Scope + +This crate is intentionally narrow. It is not trying to replace Tokio, and it +is not a generic distributed simulator. It currently provides exactly the +runtime seams needed by SpacetimeDB components that must run both in production +and under deterministic simulation. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs new file mode 100644 index 00000000000..7633ef08e40 --- /dev/null +++ b/crates/runtime/src/lib.rs @@ -0,0 +1,122 @@ +//! Runtime and deterministic simulation utilities shared by core and DST. + +use std::{fmt, future::Future, time::Duration}; + +#[cfg(feature = "simulation")] +pub mod sim; + +#[cfg(feature = "tokio")] +pub type Handle = tokio::runtime::Handle; +#[cfg(feature = "tokio")] +pub type Runtime = tokio::runtime::Runtime; + +#[derive(Clone)] +pub enum RuntimeDispatch { + #[cfg(feature = "tokio")] + Tokio(Handle), + #[cfg(feature = "simulation")] + Simulation(sim::Handle), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeTimeout; + +impl fmt::Display for RuntimeTimeout { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("runtime operation timed out") + } +} + +impl std::error::Error for RuntimeTimeout {} + +impl RuntimeDispatch { + #[cfg(feature = "tokio")] + pub fn tokio(handle: Handle) -> Self { + Self::Tokio(handle) + } + + #[cfg(feature = "tokio")] + pub fn tokio_current() -> Self { + Self::tokio(Handle::current()) + } + + #[cfg(feature = "simulation")] + pub fn simulation(handle: sim::Handle) -> Self { + Self::Simulation(handle) + } + + #[cfg(feature = "simulation")] + pub fn simulation_current() -> Self { + Self::simulation(sim::Handle::current().expect("simulation runtime is not active on this thread")) + } + + pub fn spawn(&self, future: impl Future + Send + 'static) { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = future; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(handle) => { + handle.spawn(future); + } + #[cfg(feature = "simulation")] + Self::Simulation(handle) => { + handle.spawn_on(sim::NodeId::MAIN, future).detach(); + } + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + + pub async fn spawn_blocking(&self, f: F) -> R + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = &f; + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::task::spawn_blocking(f) + .await + .unwrap_or_else(|e| match e.try_into_panic() { + Ok(panic_payload) => std::panic::resume_unwind(panic_payload), + Err(e) => panic!("Unexpected JoinError: {e}"), + }), + #[cfg(feature = "simulation")] + Self::Simulation(handle) => handle.spawn_on(sim::NodeId::MAIN, async move { f() }).await, + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } + + pub async fn timeout( + &self, + timeout_after: Duration, + future: impl Future, + ) -> Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = (timeout_after, future); + match self { + #[cfg(feature = "tokio")] + Self::Tokio(_) => tokio::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(feature = "simulation")] + Self::Simulation(_) => sim::time::timeout(timeout_after, future) + .await + .map_err(|_| RuntimeTimeout), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime dispatch has no enabled backend"), + } + } +} + +#[cfg(feature = "tokio")] +pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { + if let Ok(handle) = Handle::try_current() { + return Ok((handle, None)); + } + + let runtime = Runtime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs new file mode 100644 index 00000000000..765b70f631b --- /dev/null +++ b/crates/runtime/src/sim/executor.rs @@ -0,0 +1,589 @@ +//! Minimal asynchronous executor adapted from madsim's `sim/task` loop. + +use std::{ + cell::RefCell, + collections::BTreeMap, + fmt, + future::Future, + panic::AssertUnwindSafe, + pin::Pin, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, + task::{Context, Poll}, + thread::{self, Thread}, + time::Duration, +}; + +use futures_util::FutureExt; + +use crate::sim::{ + rng::{enter_rng_context, DeterminismLog}, + system_thread::enter_simulation_thread, + time::{enter_time_context, TimeHandle}, + Rng, +}; + +type Runnable = async_task::Runnable; + +/// A unique identifier for a simulated node. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NodeId(u64); + +impl NodeId { + pub const MAIN: Self = Self(0); +} + +impl fmt::Display for NodeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// A small single-threaded runtime for DST's top-level future. +/// +/// futures are scheduled as runnables, the ready queue +/// is sampled by deterministic RNG, and pending execution without future events +/// is considered a test hang. +pub struct Runtime { + executor: Arc, +} + +impl Runtime { + pub fn new(seed: u64) -> anyhow::Result { + Ok(Self { + executor: Arc::new(Executor::new(seed)), + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + let _handle_context = enter_handle_context(self.handle()); + self.executor.block_on(future) + } + + pub fn elapsed(&self) -> Duration { + self.executor.elapsed() + } + + pub fn handle(&self) -> Handle { + Handle { + executor: Arc::clone(&self.executor), + } + } + + pub fn create_node(&self) -> NodeId { + self.handle().create_node() + } + + pub fn pause(&self, node: NodeId) { + self.handle().pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.handle().resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle().spawn_on(node, future) + } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + Self::check_determinism_with(seed, make_future) + } + + /// Run a future twice with the same seed and fail if simulator choices diverge. + pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + let first = make_future.clone(); + let log = thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); + runtime.executor.enable_determinism_log(); + runtime.block_on(first()); + runtime + .executor + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + thread::spawn(move || { + let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); + runtime.executor.enable_determinism_check(log); + let output = runtime.block_on(make_future()); + runtime + .executor + .finish_determinism_check() + .unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() + } +} + +/// Cloneable access to the simulation executor. +#[derive(Clone)] +pub struct Handle { + executor: Arc, +} + +impl Handle { + pub fn current() -> Option { + current_handle() + } + + pub fn create_node(&self) -> NodeId { + self.executor.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.executor.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.executor.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.executor.spawn_on(node, future) + } + + pub fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.executor.spawn_local_on(node, future) + } +} + +thread_local! { + static CURRENT_HANDLE: RefCell> = RefCell::new(None); +} + +pub(crate) fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +fn enter_handle_context(handle: Handle) -> HandleContextGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + HandleContextGuard { previous } +} + +struct HandleContextGuard { + previous: Option, +} + +impl Drop for HandleContextGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +/// A spawned simulated task. +pub struct JoinHandle { + task: async_task::Task, +} + +impl JoinHandle { + pub fn detach(self) { + self.task.detach(); + } +} + +impl Future for JoinHandle { + type Output = T; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.task).poll(cx) + } +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +struct Executor { + queue: Receiver, + sender: Sender, + nodes: Mutex>>, + next_node: std::sync::atomic::AtomicU64, + rng: Arc>, + time: TimeHandle, +} + +impl Executor { + fn new(seed: u64) -> Self { + let queue = Queue::new(); + let mut nodes = BTreeMap::new(); + nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); + Self { + queue: queue.receiver(), + sender: queue.sender(), + nodes: Mutex::new(nodes), + next_node: std::sync::atomic::AtomicU64::new(1), + rng: Arc::new(Mutex::new(Rng::new(seed))), + time: TimeHandle::new(), + } + } + + fn elapsed(&self) -> Duration { + self.time.now() + } + + fn enable_determinism_log(&self) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_log(); + } + + fn enable_determinism_check(&self, log: DeterminismLog) { + self.rng.lock().expect("sim rng poisoned").enable_determinism_check(log); + } + + fn take_determinism_log(&self) -> Option { + self.rng.lock().expect("sim rng poisoned").take_determinism_log() + } + + fn finish_determinism_check(&self) -> Result<(), String> { + self.rng.lock().expect("sim rng poisoned").finish_determinism_check() + } + + fn create_node(&self) -> NodeId { + let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); + self.nodes + .lock() + .expect("nodes poisoned") + .insert(id, Arc::new(NodeState::default())); + id + } + + fn pause(&self, node: NodeId) { + self.node_state(node).paused.store(true, Ordering::Relaxed); + } + + fn resume(&self, node: NodeId) { + let state = self.node_state(node); + state.paused.store(false, Ordering::Relaxed); + + let mut paused = state.paused_queue.lock().expect("paused queue poisoned"); + for runnable in paused.drain(..) { + self.sender.send(runnable); + } + } + + fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = async_task::Builder::new() + .metadata(node) + .spawn(move |_| future, move |runnable| sender.send(runnable)); + runnable.schedule(); + + JoinHandle { task } + } + + fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.node_state(node); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(node) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + JoinHandle { task } + } + + #[track_caller] + fn block_on(&self, future: F) -> F::Output { + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(Arc::clone(&self.rng)); + let _time_context = enter_time_context(self.time.clone()); + let _waiter = WaiterGuard::new(&self.queue, thread::current()); + + let sender = self.sender.clone(); + let (runnable, task) = unsafe { + async_task::Builder::new() + .metadata(NodeId::MAIN) + .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + }; + runnable.schedule(); + + loop { + self.run_all_ready(); + if task.is_finished() { + return task.now_or_never().expect("finished task should resolve"); + } + + if self.time.wake_next_timer() { + continue; + } + + panic!("no runnable tasks; all simulated tasks are blocked"); + } + } + + fn run_all_ready(&self) { + while let Some(runnable) = self.queue.try_recv_random(&self.rng) { + let node = *runnable.metadata(); + let state = self.node_state(node); + if state.paused.load(Ordering::Relaxed) { + state.paused_queue.lock().expect("paused queue poisoned").push(runnable); + continue; + } + let result = std::panic::catch_unwind(AssertUnwindSafe(|| runnable.run())); + if let Err(payload) = result { + std::panic::resume_unwind(payload); + } + } + } + + fn node_state(&self, node: NodeId) -> Arc { + self.nodes + .lock() + .expect("nodes poisoned") + .get(&node) + .cloned() + .unwrap_or_else(|| panic!("unknown simulated node {node}")) + } +} + +#[derive(Clone, Default)] +struct NodeState { + paused: Arc, + paused_queue: Arc>>, +} + +pub async fn yield_now() { + YieldNow { yielded: false }.await +} + +struct YieldNow { + yielded: bool, +} + +impl Future for YieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.yielded { + Poll::Ready(()) + } else { + self.yielded = true; + cx.waker().wake_by_ref(); + Poll::Pending + } + } +} + +struct WaiterGuard<'a> { + receiver: &'a Receiver, +} + +impl<'a> WaiterGuard<'a> { + fn new(receiver: &'a Receiver, thread: Thread) -> Self { + receiver.set_waiter(Some(thread)); + Self { receiver } + } +} + +impl Drop for WaiterGuard<'_> { + fn drop(&mut self) { + self.receiver.set_waiter(None); + } +} + +struct Queue { + inner: Arc, +} + +#[derive(Clone)] +struct Sender { + inner: Arc, +} + +#[derive(Clone)] +struct Receiver { + inner: Arc, +} + +struct QueueInner { + queue: Mutex>, + waiter: Mutex>, +} + +impl Queue { + fn new() -> Self { + Self { + inner: Arc::new(QueueInner { + queue: Mutex::new(Vec::new()), + waiter: Mutex::new(None), + }), + } + } + + fn sender(&self) -> Sender { + Sender { + inner: self.inner.clone(), + } + } + + fn receiver(&self) -> Receiver { + Receiver { + inner: self.inner.clone(), + } + } +} + +impl Sender { + fn send(&self, runnable: Runnable) { + self.inner.queue.lock().expect("run queue poisoned").push(runnable); + if let Some(thread) = self.inner.waiter.lock().expect("waiter poisoned").as_ref() { + thread.unpark(); + } + } +} + +impl Receiver { + fn set_waiter(&self, thread: Option) { + *self.inner.waiter.lock().expect("waiter poisoned") = thread; + } + + fn try_recv_random(&self, rng: &Mutex) -> Option { + let mut queue = self.inner.queue.lock().expect("run queue poisoned"); + if queue.is_empty() { + return None; + } + let idx = rng.lock().expect("rng poisoned").index(queue.len()); + Some(queue.swap_remove(idx)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }; + + use super::*; + + #[test] + fn paused_node_does_not_run_until_resumed() { + let mut runtime = Runtime::new(1).unwrap(); + let node = runtime.create_node(); + runtime.pause(node); + + let runs = Arc::new(AtomicUsize::new(0)); + let task_runs = Arc::clone(&runs); + let task = runtime.spawn_on(node, async move { + task_runs.fetch_add(1, Ordering::SeqCst); + 7 + }); + + runtime.block_on(async { + yield_now().await; + }); + assert_eq!(runs.load(Ordering::SeqCst), 0); + + runtime.resume(node); + assert_eq!(runtime.block_on(task), 7); + assert_eq!(runs.load(Ordering::SeqCst), 1); + } + + #[test] + fn handle_can_spawn_onto_node_from_simulated_task() { + let mut runtime = Runtime::new(2).unwrap(); + let handle = runtime.handle(); + + let value = runtime.block_on(async move { + let node = handle.create_node(); + handle.spawn_on(node, async { 11 }).await + }); + + assert_eq!(value, 11); + } + + #[test] + fn current_handle_can_spawn_local_task_inside_runtime() { + assert!(Handle::current().is_none()); + + let mut runtime = Runtime::new(5).unwrap(); + let value = runtime.block_on(async { + let handle = Handle::current().expect("sim handle should be present inside block_on"); + let node = handle.create_node(); + let captured = std::rc::Rc::new(17); + handle + .spawn_local_on(node, async move { + yield_now().await; + *captured + }) + .await + }); + + assert_eq!(value, 17); + assert!(Handle::current().is_none()); + } + + #[test] + fn check_determinism_runs_future_twice() { + static CALLS: AtomicUsize = AtomicUsize::new(0); + CALLS.store(0, Ordering::SeqCst); + + let value = Runtime::check_determinism(3, || async { + CALLS.fetch_add(1, Ordering::SeqCst); + yield_now().await; + 13 + }); + + assert_eq!(value, 13); + assert_eq!(CALLS.load(Ordering::SeqCst), 2); + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn check_determinism_rejects_different_scheduler_sequence() { + static FIRST_RUN: AtomicBool = AtomicBool::new(true); + FIRST_RUN.store(true, Ordering::SeqCst); + + Runtime::check_determinism(4, || async { + if FIRST_RUN.swap(false, Ordering::SeqCst) { + yield_now().await; + } + }); + } +} diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs new file mode 100644 index 00000000000..467903cf2b4 --- /dev/null +++ b/crates/runtime/src/sim/mod.rs @@ -0,0 +1,23 @@ +//! Local deterministic simulation runtime. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +mod executor; +mod rng; +mod system_thread; +pub mod time; + +use std::time::Duration; + +pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use rng::{DecisionSource, Rng}; + +pub fn advance_time(duration: Duration) { + time::advance(duration); +} + +pub fn decision_source(seed: u64) -> DecisionSource { + DecisionSource::new(seed) +} diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs new file mode 100644 index 00000000000..09afde03031 --- /dev/null +++ b/crates/runtime/src/sim/rng.rs @@ -0,0 +1,367 @@ +use std::{ + cell::{Cell, RefCell}, + ptr, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, OnceLock, + }, +}; + +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +#[derive(Clone, Debug)] +pub struct Rng { + seed: u64, + state: u64, + log: Option>, + check: Option<(Vec, usize)>, +} + +impl Rng { + pub fn new(seed: u64) -> Self { + unsafe { getentropy(ptr::null_mut(), 0) }; + if !init_std_random_state(seed) { + tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); + } + Self { + seed, + state: splitmix64(seed), + log: None, + check: None, + } + } + + pub fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(GAMMA); + let value = splitmix64(self.state); + self.record_checkpoint(value); + value + } + + pub fn index(&mut self, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + (self.next_u64() as usize) % len + } + + pub fn sample_probability(&mut self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } + + pub(crate) fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(std::mem::size_of::()) { + let bytes = self.next_u64().to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } + } + + pub(crate) fn enable_determinism_log(&mut self) { + self.log = Some(Vec::new()); + self.check = None; + } + + pub(crate) fn enable_determinism_check(&mut self, log: DeterminismLog) { + self.check = Some((log.0, 0)); + self.log = None; + } + + pub(crate) fn take_determinism_log(&mut self) -> Option { + self.log + .take() + .or_else(|| self.check.take().map(|(log, _)| log)) + .map(DeterminismLog) + } + + pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { + if let Some((log, consumed)) = &self.check + && *consumed != log.len() + { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + self.seed, + log.len() + )); + } + Ok(()) + } + + fn record_checkpoint(&mut self, value: u64) { + if self.log.is_none() && self.check.is_none() { + return; + } + + let checkpoint = checksum(value); + if let Some(log) = &mut self.log { + log.push(checkpoint); + } + if let Some((expected, consumed)) = &mut self.check { + if expected.get(*consumed) != Some(&checkpoint) { + panic!( + "non-determinism detected for seed {} at checkpoint {consumed}", + self.seed + ); + } + *consumed += 1; + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct DeterminismLog(Vec); + +#[derive(Debug)] +pub struct DecisionSource { + state: AtomicU64, +} + +impl DecisionSource { + pub fn new(seed: u64) -> Self { + Self { + state: AtomicU64::new(splitmix64(seed)), + } + } + + pub fn sample_probability(&self, probability: f64) -> bool { + probability_sample(self.next_u64(), probability) + } + + fn next_u64(&self) -> u64 { + let state = self.state.fetch_add(GAMMA, Ordering::Relaxed).wrapping_add(GAMMA); + splitmix64(state) + } +} + +fn probability_sample(value: u64, probability: f64) -> bool { + if probability <= 0.0 { + return false; + } + if probability >= 1.0 { + return true; + } + + // Use the top 53 bits to build an exactly representable f64 in [0, 1). + let unit = (value >> 11) as f64 * (1.0 / ((1u64 << 53) as f64)); + unit < probability +} + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +fn checksum(value: u64) -> u8 { + value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) +} + +thread_local! { + static CURRENT_RNG: RefCell>>> = const { RefCell::new(None) }; + static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; +} + +pub(crate) struct RngContextGuard { + previous: Option>>, +} + +pub(crate) fn enter_rng_context(rng: Arc>) -> RngContextGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + RngContextGuard { previous } +} + +impl Drop for RngContextGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +fn init_std_random_state(seed: u64) -> bool { + STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); + let _ = std::collections::hash_map::RandomState::new(); + STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() +} + +fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { + if buflen == 0 { + return; + } + let mut state = splitmix64(seed); + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + for chunk in buf.chunks_mut(std::mem::size_of::()) { + state = state.wrapping_add(GAMMA); + let bytes = splitmix64(state).to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } +} + +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; + rng.lock().expect("sim rng poisoned").fill_bytes(buf); + true + }) +} + +/// Obtain random bytes through the simulation RNG when running inside the DST executor. +/// +/// This mirrors madsim's libc-level hook. It covers libc users and macOS +/// `CCRandomGenerateBytes`; crates that issue raw kernel syscalls can still +/// bypass it. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + #[cfg(target_os = "macos")] + let _ = flags; + + if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { + fill_from_seed(buf, buflen, seed); + return buflen as isize; + } + if fill_from_current_rng(buf, buflen) { + return buflen as isize; + } + + #[cfg(target_os = "linux")] + { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + let original = GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }); + unsafe { original(buf, buflen, flags) } + } + + #[cfg(target_os = "macos")] + { + type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; + static GETENTROPY: OnceLock = OnceLock::new(); + let original = GETENTROPY.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getentropy"); + std::mem::transmute(ptr) + }); + match unsafe { original(buf, buflen) } { + -1 => -1, + 0 => buflen as isize, + _ => unreachable!("unexpected getentropy return value"), + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (buf, buflen, flags); + compile_error!("unsupported OS for DST getrandom override"); + } +} + +/// Fill a buffer with random bytes through the same hook used by libc. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +/// macOS uses CommonCrypto for process randomness in newer Rust toolchains. +#[cfg(target_os = "macos")] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { + match unsafe { getrandom(bytes, count, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use std::{collections::HashMap, sync::Arc}; + + use super::*; + + #[test] + fn rng_log_check_accepts_same_sequence() { + let mut first = Rng::new(10); + first.enable_determinism_log(); + let first_values = (0..8).map(|_| first.next_u64()).collect::>(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(10); + second.enable_determinism_check(log); + let second_values = (0..8).map(|_| second.next_u64()).collect::>(); + second.finish_determinism_check().unwrap(); + + assert_eq!(first_values, second_values); + } + + #[test] + fn decision_source_matches_rng_sequence() { + let source = DecisionSource::new(12); + let mut rng = Rng::new(12); + + for _ in 0..16 { + assert_eq!(source.next_u64(), rng.next_u64()); + } + } + + #[test] + #[should_panic(expected = "non-determinism detected")] + fn rng_log_check_rejects_different_sequence() { + let mut first = Rng::new(10); + first.enable_determinism_log(); + first.next_u64(); + let log = first.take_determinism_log().unwrap(); + + let mut second = Rng::new(11); + second.enable_determinism_check(log); + second.next_u64(); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = Arc::new(Mutex::new(Rng::new(20))); + let _guard = enter_rng_context(Arc::clone(&rng)); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let mut expected_rng = Rng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn std_hashmap_order_is_seeded_for_runtime_thread() { + fn order_for(seed: u64) -> Vec<(u64, u64)> { + std::thread::spawn(move || { + let _rng = Rng::new(seed); + (0..12) + .map(|idx| (idx, idx)) + .collect::>() + .into_iter() + .collect() + }) + .join() + .unwrap() + } + + assert_eq!(order_for(30), order_for(30)); + } +} diff --git a/crates/runtime/src/sim/system_thread.rs b/crates/runtime/src/sim/system_thread.rs new file mode 100644 index 00000000000..f395a25442a --- /dev/null +++ b/crates/runtime/src/sim/system_thread.rs @@ -0,0 +1,64 @@ +//! Guard against creating OS threads from inside the simulator. + +use std::{cell::Cell, sync::OnceLock}; + +thread_local! { + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +pub(crate) struct SimulationThreadGuard { + previous: bool, +} + +pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +/// Forbid creating system threads in simulation. +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200).unwrap(); + runtime.block_on(async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } +} diff --git a/crates/runtime/src/sim/time.rs b/crates/runtime/src/sim/time.rs new file mode 100644 index 00000000000..2508b35b249 --- /dev/null +++ b/crates/runtime/src/sim/time.rs @@ -0,0 +1,343 @@ +//! Virtual time for the local simulation runtime. + +use std::{ + cell::RefCell, + collections::BTreeMap, + fmt, + future::Future, + pin::Pin, + sync::{Arc, Mutex}, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use futures::future::{select, Either}; + +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().expect("sim time poisoned").now + } + + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + pub fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock().expect("sim time poisoned"); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock().expect("sim time poisoned"); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().expect("sim time poisoned").timers.remove(&id); + } + + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock().expect("sim time poisoned"); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } +} + +impl Default for TimeHandle { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +thread_local! { + static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; +} + +pub struct TimeContextGuard { + previous: Option, +} + +pub fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { + let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); + TimeContextGuard { previous } +} + +pub fn try_current_handle() -> Option { + CURRENT_TIME.with(|current| current.borrow().clone()) +} + +pub fn now() -> Duration { + try_current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn advance(duration: Duration) { + if let Some(handle) = try_current_handle() { + handle.advance(duration); + } +} + +pub fn sleep(duration: Duration) -> Sleep { + Sleep { + duration, + state: SleepState::Unregistered, + } +} + +pub async fn timeout(duration: Duration, future: impl Future) -> Result { + futures::pin_mut!(future); + let sleep = sleep(duration); + futures::pin_mut!(sleep); + + match select(future, sleep).await { + Either::Left((output, _)) => Ok(output), + Either::Right(((), _)) => Err(TimeoutElapsed { duration }), + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TimeoutElapsed { + duration: Duration, +} + +impl TimeoutElapsed { + pub fn duration(self) -> Duration { + self.duration + } +} + +impl fmt::Display for TimeoutElapsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "simulated timeout elapsed after {:?}", self.duration) + } +} + +impl std::error::Error for TimeoutElapsed {} + +impl Drop for TimeContextGuard { + fn drop(&mut self) { + CURRENT_TIME.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +enum SleepState { + Unregistered, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if matches!(self.state, SleepState::Unregistered) { + let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::sim; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 180b3a60b4c..3bc7335625a 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -54,7 +54,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio.workspace = true +tokio = { workspace = true, features = ["full"] } tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index b407372aa34..ad1e02e788b 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,12 +1,18 @@ +#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; +#[cfg(not(simulation))] use spacetimedb_pg::pg_server; +#[cfg(not(simulation))] use std::io::{self, Write}; +#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; +#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -15,11 +21,14 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; +#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -111,6 +120,7 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); + #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -197,13 +207,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); + #[cfg(simulation)] + { + let _ = (pg_port, ctx, listen_addr); + anyhow::bail!("standalone start server mode is not supported under simulation"); + } + + #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + #[cfg(not(simulation))] + { + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + } + #[cfg(not(simulation))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); + #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); + #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -249,40 +272,44 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" - ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + #[cfg(not(simulation))] + { + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + ))?; + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks + } } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + #[cfg(not(simulation))] Ok(()) } @@ -301,6 +328,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. +#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -335,11 +363,13 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] +#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } +#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -353,6 +383,7 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } +#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -423,6 +454,7 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. +#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -437,6 +469,7 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. +#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); From c83ed2e99035e8d5eed1459d422cda047b18de59 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 20:41:16 +0530 Subject: [PATCH 04/40] LockedFsRepo --- crates/commitlog/src/lib.rs | 30 +++++++-- crates/commitlog/src/repo/mod.rs | 10 +++ crates/durability/src/imp/local.rs | 105 +++++++++++++++++++++++------ 3 files changed, 117 insertions(+), 28 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index d80c1fb00b7..4b5727bc64c 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -6,7 +6,7 @@ use std::{ }; use log::trace; -use repo::{fs::OnNewSegmentFn, Repo}; +use repo::{fs::OnNewSegmentFn, Repo, RepoWithSizeOnDisk}; use spacetimedb_paths::server::CommitLogDir; pub mod commit; @@ -188,11 +188,6 @@ impl Commitlog { Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) } - /// Determine the size on disk of this commitlog. - pub fn size_on_disk(&self) -> io::Result { - let inner = self.inner.read().unwrap(); - inner.repo.size_on_disk() - } } impl Commitlog @@ -210,6 +205,29 @@ where inner: RwLock::new(inner), }) } +} + +impl Commitlog +where + R: RepoWithSizeOnDisk, +{ + /// Determine the size on disk of this commitlog. + pub fn size_on_disk(&self) -> io::Result { + let inner = self.inner.read().unwrap(); + inner.repo.size_on_disk() + } +} + +impl RepoWithSizeOnDisk for repo::Fs { + fn size_on_disk(&self) -> io::Result { + Self::size_on_disk(self) + } +} + +impl Commitlog +where + R: Repo, +{ /// Determine the maximum transaction offset considered durable. /// diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 358936c3c2a..0efa173f8f6 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -144,6 +144,11 @@ pub trait Repo: Clone + fmt::Display { } } +/// Capability trait for repos that can report storage usage. +pub trait RepoWithSizeOnDisk: Repo { + fn size_on_disk(&self) -> io::Result; +} + /// Marker for repos that do not require an external lock file. /// /// Durability implementations can use this to expose repo-backed opening @@ -152,6 +157,11 @@ pub trait Repo: Clone + fmt::Display { pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +impl RepoWithSizeOnDisk for &T { + fn size_on_disk(&self) -> io::Result { + T::size_on_disk(self) + } +} #[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 5cc03099ab6..ab5f44217b8 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -14,7 +14,7 @@ use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, - repo::{Fs, Repo, RepoWithoutLockFile}, + repo::{Fs, Repo, RepoWithSizeOnDisk, RepoWithoutLockFile}, Commit, Commitlog, Decoder, Encode, Transaction, }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; @@ -88,7 +88,7 @@ pub enum OpenError { /// /// Note, however, that instantiating `T` to a different type may require to /// change the log format version! -pub struct Local +pub struct Local where R: Repo, { @@ -114,7 +114,82 @@ where actor: Mutex>>, } -impl Local { +/// Commitlog repo backed by [`Fs`] and protected by a [`LockedFile`]. +#[derive(Clone, Debug)] +pub struct LockedFsRepo { + repo: Fs, + #[allow(unused)] + lock: Arc, +} + +impl LockedFsRepo { + pub fn open(replica_dir: ReplicaDir, on_new_segment: Option>) -> Result { + // We use the `db.lock` file for historical reasons and to keep + // compatibility with existing standalone layouts. + let lock = LockedFile::lock(replica_dir.0.join("db.lock")).map(Arc::new)?; + let repo = Fs::new(replica_dir.commit_log(), on_new_segment)?; + Ok(Self { repo, lock }) + } +} + +impl std::fmt::Display for LockedFsRepo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.repo.fmt(f) + } +} + +impl Repo for LockedFsRepo { + type SegmentWriter = ::SegmentWriter; + type SegmentReader = ::SegmentReader; + + fn create_segment(&self, offset: u64, header: spacetimedb_commitlog::segment::Header) -> io::Result { + self.repo.create_segment(offset, header) + } + + fn open_segment_reader(&self, offset: u64) -> io::Result { + self.repo.open_segment_reader(offset) + } + + fn open_segment_writer(&self, offset: u64) -> io::Result { + self.repo.open_segment_writer(offset) + } + + fn segment_file_path(&self, offset: u64) -> Option { + self.repo.segment_file_path(offset) + } + + fn remove_segment(&self, offset: u64) -> io::Result<()> { + self.repo.remove_segment(offset) + } + + fn compress_segment(&self, offset: u64) -> io::Result<()> { + self.repo.compress_segment(offset) + } + + fn existing_offsets(&self) -> io::Result> { + self.repo.existing_offsets() + } + + fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + self.repo.create_offset_index(offset, cap) + } + + fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { + self.repo.remove_offset_index(offset) + } + + fn get_offset_index(&self, offset: TxOffset) -> io::Result { + self.repo.get_offset_index(offset) + } +} + +impl RepoWithSizeOnDisk for LockedFsRepo { + fn size_on_disk(&self) -> io::Result { + self.repo.size_on_disk() + } +} + +impl Local { /// Create a [`Local`] instance at the `replica_dir`. /// /// `replica_dir` must already exist. @@ -130,17 +205,9 @@ impl Local { on_new_segment: Option>, ) -> Result { info!("open local durability"); - - // We could just place a lock on the commitlog directory, - // yet for backwards-compatibility, we keep using the `db.lock` file. - let lock = LockedFile::lock(replica_dir.0.join("db.lock"))?; - - let clog = Arc::new(Commitlog::open( - replica_dir.commit_log(), - opts.commitlog, - on_new_segment, - )?); - Self::open_inner(clog, rt, opts, Some(lock)) + let repo = LockedFsRepo::open(replica_dir, on_new_segment)?; + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts) } } @@ -153,7 +220,6 @@ where clog: Arc, R>>, rt: tokio::runtime::Handle, opts: Options, - lock: Option, ) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); @@ -168,8 +234,6 @@ where queue_depth: queue_depth.clone(), batch_capacity: opts.batch_capacity, - - lock, } .run(txdata_rx), ); @@ -198,7 +262,7 @@ where pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts, None) + Self::open_inner(clog, rt, opts) } } @@ -229,7 +293,7 @@ where } } -impl Local { +impl Local { /// Get the size on disk of the underlying [`Commitlog`]. pub fn size_on_disk(&self) -> io::Result { self.clog.size_on_disk() @@ -246,9 +310,6 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - - #[allow(unused)] - lock: Option, } impl Actor From 813e418bc3929a48522cc4db18a0c65cdad0e86d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 21:13:36 +0530 Subject: [PATCH 05/40] comments --- crates/commitlog/src/lib.rs | 5 ++--- crates/commitlog/src/repo/mod.rs | 8 ++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 4b5727bc64c..9e640733613 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -187,7 +187,6 @@ impl Commitlog { } Self::open_with_repo(repo::Fs::new(root, on_new_segment)?, opts) } - } impl Commitlog @@ -228,7 +227,6 @@ impl Commitlog where R: Repo, { - /// Determine the maximum transaction offset considered durable. /// /// The offset is `None` if the log hasn't been flushed to disk yet. @@ -423,8 +421,9 @@ where } } -impl Commitlog +impl Commitlog where + T: Encode, R: Repo, { /// Write `transactions` to the log. diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 0efa173f8f6..4bbf72a97f8 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -156,8 +156,12 @@ pub trait RepoWithSizeOnDisk: Repo { /// violate single-writer safety. pub trait RepoWithoutLockFile: Repo {} -impl RepoWithoutLockFile for &T {} -impl RepoWithSizeOnDisk for &T { +impl RepoWithoutLockFile for &T where T: RepoWithoutLockFile {} + +impl RepoWithSizeOnDisk for &T +where + T: RepoWithSizeOnDisk, +{ fn size_on_disk(&self) -> io::Result { T::size_on_disk(self) } From 5946261a2617e7a95494d291f2d04333c2bb995e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 8 May 2026 21:25:17 +0530 Subject: [PATCH 06/40] cleanup --- crates/commitlog/src/repo/mod.rs | 13 ------------ crates/durability/src/imp/local.rs | 34 ++++++++++++++++-------------- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 4bbf72a97f8..5e1b313e766 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -149,15 +149,6 @@ pub trait RepoWithSizeOnDisk: Repo { fn size_on_disk(&self) -> io::Result; } -/// Marker for repos that do not require an external lock file. -/// -/// Durability implementations can use this to expose repo-backed opening -/// only for storage backends where skipping the filesystem `db.lock` cannot -/// violate single-writer safety. -pub trait RepoWithoutLockFile: Repo {} - -impl RepoWithoutLockFile for &T where T: RepoWithoutLockFile {} - impl RepoWithSizeOnDisk for &T where T: RepoWithSizeOnDisk, @@ -166,10 +157,6 @@ where T::size_on_disk(self) } } - -#[cfg(any(test, feature = "test"))] -impl RepoWithoutLockFile for Memory {} - impl Repo for &T { type SegmentWriter = T::SegmentWriter; type SegmentReader = T::SegmentReader; diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index ab5f44217b8..90a103ae91d 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,10 +11,12 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; +#[cfg(any(test, feature = "test"))] +use spacetimedb_commitlog::repo::Memory; use spacetimedb_commitlog::{ error, payload::Txdata, - repo::{Fs, Repo, RepoWithSizeOnDisk, RepoWithoutLockFile}, + repo::{Fs, Repo, RepoWithSizeOnDisk}, Commit, Commitlog, Decoder, Encode, Transaction, }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; @@ -142,7 +144,11 @@ impl Repo for LockedFsRepo { type SegmentWriter = ::SegmentWriter; type SegmentReader = ::SegmentReader; - fn create_segment(&self, offset: u64, header: spacetimedb_commitlog::segment::Header) -> io::Result { + fn create_segment( + &self, + offset: u64, + header: spacetimedb_commitlog::segment::Header, + ) -> io::Result { self.repo.create_segment(offset, header) } @@ -170,7 +176,11 @@ impl Repo for LockedFsRepo { self.repo.existing_offsets() } - fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + fn create_offset_index( + &self, + offset: TxOffset, + cap: u64, + ) -> io::Result { self.repo.create_offset_index(offset, cap) } @@ -216,6 +226,11 @@ where T: Encode + Send + Sync + 'static, R: Repo + Send + Sync + 'static, { + pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { + info!("open local durability"); + let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); + Self::open_inner(clog, rt, opts) + } fn open_inner( clog: Arc, R>>, rt: tokio::runtime::Handle, @@ -253,19 +268,6 @@ where } } -impl Local -where - T: Encode + Send + Sync + 'static, - R: RepoWithoutLockFile + Send + Sync + 'static, -{ - /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: tokio::runtime::Handle, opts: Options) -> Result { - info!("open local durability"); - let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); - Self::open_inner(clog, rt, opts) - } -} - impl Local where T: Send + Sync + 'static, From 2104ced1fa78f72dfe7e660ad2cd6ac485a70186 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 15:38:28 +0530 Subject: [PATCH 07/40] lint Signed-off-by: Shubham Mishra --- crates/durability/src/imp/local.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 65f7499b79f..5b3124068f1 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -11,8 +11,6 @@ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; use scopeguard::ScopeGuard; -#[cfg(any(test, feature = "test"))] -use spacetimedb_commitlog::repo::Memory; use spacetimedb_commitlog::{ error, payload::Txdata, From fc2e146d6cd28e8424027f944e92017d53bd54e1 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 17:29:09 +0530 Subject: [PATCH 08/40] make sim module mostly non_std --- Cargo.lock | 2 +- crates/core/src/db/durability.rs | 4 +- crates/core/src/db/persistence.rs | 18 +- crates/core/src/db/relational_db.rs | 42 +- crates/core/src/db/snapshot.rs | 29 +- crates/core/src/runtime.rs | 3 +- .../subscription/module_subscription_actor.rs | 2 +- crates/durability/src/imp/local.rs | 53 +-- crates/durability/tests/io/fallocate.rs | 2 +- crates/runtime/Cargo.toml | 5 +- crates/runtime/README.md | 255 ++++------ crates/runtime/src/adapter/mod.rs | 5 + crates/runtime/src/adapter/sim_std.rs | 361 +++++++++++++++ crates/runtime/src/adapter/tokio.rs | 11 + crates/runtime/src/lib.rs | 40 +- crates/runtime/src/sim/buggify.rs | 51 ++ crates/runtime/src/sim/config.rs | 16 + crates/runtime/src/sim/executor.rs | 389 +++++++++------- crates/runtime/src/sim/mod.rs | 17 +- crates/runtime/src/sim/rng.rs | 437 +++++++----------- crates/runtime/src/sim/system_thread.rs | 64 --- crates/runtime/src/sim/time.rs | 343 -------------- crates/runtime/src/sim/time/mod.rs | 297 ++++++++++++ crates/runtime/src/sim/time/sleep.rs | 97 ++++ crates/runtime/tests/sim_e2e.rs | 108 +++++ 25 files changed, 1532 insertions(+), 1119 deletions(-) create mode 100644 crates/runtime/src/adapter/mod.rs create mode 100644 crates/runtime/src/adapter/sim_std.rs create mode 100644 crates/runtime/src/adapter/tokio.rs create mode 100644 crates/runtime/src/sim/buggify.rs create mode 100644 crates/runtime/src/sim/config.rs delete mode 100644 crates/runtime/src/sim/system_thread.rs delete mode 100644 crates/runtime/src/sim/time.rs create mode 100644 crates/runtime/src/sim/time/mod.rs create mode 100644 crates/runtime/src/sim/time/sleep.rs create mode 100644 crates/runtime/tests/sim_e2e.rs diff --git a/Cargo.lock b/Cargo.lock index a0193647eae..a3e768f96e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8470,11 +8470,11 @@ dependencies = [ name = "spacetimedb-runtime" version = "2.2.0" dependencies = [ - "anyhow", "async-task", "futures", "futures-util", "libc", + "spin", "tokio", "tracing", ] diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 3a466d53eb6..6d3b814a55f 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -10,7 +10,7 @@ use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use crate::{db::persistence::Durability, runtime::RuntimeDispatch}; +use crate::{db::persistence::Durability, runtime::Runtime}; pub(super) fn request_durability( durability: &Durability, @@ -31,7 +31,7 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &RuntimeDispatch, database_identity: Identity) { +pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, database_identity: Identity) { let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 83d58befb06..cd69b2d82ad 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,7 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, runtime::RuntimeDispatch, util::asyncify}; +use crate::{messages::control_db::Database, runtime::Runtime, util::asyncify}; use super::{ relational_db::{self, Txdata}, @@ -42,7 +42,7 @@ pub struct Persistence { /// this type. pub snapshots: Option, /// Runtime onto which durability-related tasks shall be spawned. - pub runtime: RuntimeDispatch, + pub runtime: Runtime, } impl Persistence { @@ -53,14 +53,14 @@ impl Persistence { snapshots: Option, runtime: tokio::runtime::Handle, ) -> Self { - Self::new_with_runtime(durability, disk_size, snapshots, RuntimeDispatch::tokio(runtime)) + Self::new_with_runtime(durability, disk_size, snapshots, Runtime::tokio(runtime)) } pub fn new_with_runtime( durability: impl spacetimedb_durability::Durability + 'static, disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, - runtime: RuntimeDispatch, + runtime: Runtime, ) -> Self { Self { durability: Arc::new(durability), @@ -100,7 +100,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -158,11 +158,7 @@ impl PersistenceProvider for LocalPersistenceProvider { asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - RuntimeDispatch::tokio_current(), - ) + SnapshotWorker::new_with_repository(repo, snapshot::Compression::Enabled, Runtime::tokio_current()) })?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; @@ -177,7 +173,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: RuntimeDispatch::tokio_current(), + runtime: Runtime::tokio_current(), }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 57c7cde59cc..b576b1e6c14 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,7 +1,7 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; -use crate::runtime::RuntimeDispatch; +use crate::runtime::Runtime; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -41,9 +41,9 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; #[cfg(test)] use spacetimedb_paths::server::SnapshotDirPath; +use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; @@ -102,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -1678,7 +1678,7 @@ pub async fn local_durability( replica_dir: ReplicaDir, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let runtime = RuntimeDispatch::tokio_current(); + let runtime = Runtime::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -1957,14 +1957,13 @@ pub mod tests_utils { ) -> Result<(RelationalDB, Arc>), DBError> { let snapshots = want_snapshot_repo .then(|| { - open_snapshot_repo(root.snapshots(), db_identity, replica_id) - .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Disabled, - RuntimeDispatch::tokio(rt.clone()), - ) - }) + open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + Runtime::tokio(rt.clone()), + ) + }) }) .transpose()?; @@ -1975,7 +1974,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: RuntimeDispatch::tokio(rt), + runtime: Runtime::tokio(rt), }; let (db, _) = RelationalDB::open( @@ -2086,14 +2085,13 @@ pub mod tests_utils { ) -> Result<(RelationalDB, Arc>), DBError> { let snapshots = want_snapshot_repo .then(|| { - open_snapshot_repo(root.snapshots(), Identity::ZERO, 0) - .map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Disabled, - RuntimeDispatch::tokio(rt.clone()), - ) - }) + open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { + SnapshotWorker::new_with_repository( + repo, + snapshot::Compression::Disabled, + Runtime::tokio(rt.clone()), + ) + }) }) .transpose()?; let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; @@ -2102,7 +2100,7 @@ pub mod tests_utils { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: RuntimeDispatch::tokio(rt), + runtime: Runtime::tokio(rt), }; let db = Self::open_db(history, Some(persistence), None, 0)?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index dda981a89bd..042b257b608 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,7 +17,7 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; use tokio::sync::watch; -use crate::{runtime::RuntimeDispatch, worker_metrics::WORKER_METRICS}; +use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; pub type SnapshotDatabaseState = Arc>; @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, runtime: RuntimeDispatch) -> Self { + pub fn new(snapshot_repo: Arc, runtime: Runtime) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -142,7 +142,7 @@ impl SnapshotWorker { pub fn new_with_repository( snapshot_repository: Arc, compression: Compression, - runtime: RuntimeDispatch, + runtime: Runtime, ) -> Self { let database = snapshot_repository.database_identity(); let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); @@ -200,7 +200,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: RuntimeDispatch, + runtime: Runtime, compression: Option, } @@ -345,7 +345,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: RuntimeDispatch, + runtime: Runtime, } impl Compressor { @@ -378,15 +378,16 @@ impl Compressor { let mut stats = self.stats.take().unwrap_or_default(); let runtime = self.runtime.clone(); - let (mut stats, res) = runtime.spawn_blocking({ - let range = range.clone(); - move || { - let _timer = inner_timer.start_timer(); - let res = snapshot_repo.compress_snapshots(&mut stats, range); - (stats, res) - } - }) - .await; + let (mut stats, res) = runtime + .spawn_blocking({ + let range = range.clone(); + move || { + let _timer = inner_timer.start_timer(); + let res = snapshot_repo.compress_snapshots(&mut stats, range); + (stats, res) + } + }) + .await; let elapsed = Duration::from_secs_f64(timer.stop_and_record()); self.metrics.report_and_reset(&mut stats); // Store stats for reuse. diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs index 53baad4d73a..4c55c71dccd 100644 --- a/crates/core/src/runtime.rs +++ b/crates/core/src/runtime.rs @@ -1,3 +1,4 @@ //! Runtime boundary re-exported for core call sites. -pub use spacetimedb_runtime::{current_handle_or_new_runtime, Handle, Runtime, RuntimeDispatch, RuntimeTimeout}; +pub use spacetimedb_runtime::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; +pub use spacetimedb_runtime::{Runtime, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index f82d36286d4..2332782a52d 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2061,7 +2061,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: crate::runtime::RuntimeDispatch::tokio(rt), + runtime: crate::runtime::Runtime::tokio(rt), }), None, 0, diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 51d89e2e848..f59df89c920 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -18,7 +18,7 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::RuntimeDispatch; +use spacetimedb_runtime::Runtime; use thiserror::Error; use tokio::sync::{oneshot, watch}; use tracing::{instrument, Span}; @@ -122,7 +122,7 @@ impl Local { /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - runtime: RuntimeDispatch, + runtime: Runtime, opts: Options, on_new_segment: Option>, ) -> Result { @@ -148,7 +148,7 @@ where { fn open_inner( clog: Arc, R>>, - runtime: RuntimeDispatch, + runtime: Runtime, opts: Options, lock: Option, ) -> Result { @@ -190,7 +190,7 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, runtime: RuntimeDispatch, opts: Options) -> Result { + pub fn open_with_repo(repo: R, runtime: Runtime, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); Self::open_inner(clog, runtime, opts, None) @@ -241,7 +241,7 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - runtime: RuntimeDispatch, + runtime: Runtime, _lock: Option, } @@ -277,14 +277,15 @@ where let ready_len = tx_buf.len(); self.queue_depth.fetch_sub(ready_len as u64, Relaxed); let runtime = self.runtime.clone(); - tx_buf = runtime.spawn_blocking(move || -> io::Result>>> { - for tx in tx_buf.drain(..) { - clog.commit([tx.into_transaction()])?; - } - Ok(tx_buf) - }) - .await - .expect("commitlog write failed"); + tx_buf = runtime + .spawn_blocking(move || -> io::Result>>> { + for tx in tx_buf.drain(..) { + clog.commit([tx.into_transaction()])?; + } + Ok(tx_buf) + }) + .await + .expect("commitlog write failed"); if self.flush_and_sync().await.is_err() { sync_on_exit = false; break; @@ -317,19 +318,19 @@ where let runtime = self.runtime.clone(); runtime .spawn_blocking(move || { - let _span = span.enter(); - clog.flush_and_sync() - }) - .await - .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) - .inspect(|maybe_offset| { - if let Some(new_offset) = maybe_offset { - trace!("synced to offset {new_offset}"); - self.durable_offset.send_modify(|val| { - val.replace(*new_offset); - }); - } - }) + let _span = span.enter(); + clog.flush_and_sync() + }) + .await + .inspect_err(|e| warn!("error flushing commitlog: {e:#}")) + .inspect(|maybe_offset| { + if let Some(new_offset) = maybe_offset { + trace!("synced to offset {new_offset}"); + self.durable_offset.send_modify(|val| { + val.replace(*new_offset); + }); + } + }) } } diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs index be5ee61bc0b..2783b2178ec 100644 --- a/crates/durability/tests/io/fallocate.rs +++ b/crates/durability/tests/io/fallocate.rs @@ -161,7 +161,7 @@ async fn local_durability( ) -> Result, spacetimedb_durability::local::OpenError> { spacetimedb_durability::Local::open( dir, - spacetimedb_runtime::RuntimeDispatch::tokio_current(), + spacetimedb_runtime::Runtime::tokio_current(), spacetimedb_durability::local::Options { commitlog: spacetimedb_commitlog::Options { max_segment_size, diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 6f62e0e6b08..a86ee9d0fc4 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -10,15 +10,16 @@ rust-version.workspace = true workspace = true [dependencies] -anyhow.workspace = true futures.workspace = true futures-util.workspace = true tokio = { workspace = true, optional = true } async-task = { version = "4.4", optional = true } +spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } tracing = { workspace = true, optional = true } [features] default = ["tokio"] tokio = ["dep:tokio"] -simulation = ["dep:async-task", "dep:libc", "dep:tracing"] +simulation = ["dep:async-task", "dep:spin"] +simulation-std = ["simulation", "dep:libc", "dep:tracing"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index f26134ba7bd..576122be42e 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,182 +1,127 @@ # spacetimedb-runtime -`spacetimedb-runtime` is the small runtime abstraction layer shared by core -code and DST. It exists for one reason: code such as durability and -snapshotting needs to spawn work, run blocking sections, and wait with -timeouts, but we want that same code to run on either: - -- real Tokio in production, or -- the deterministic DST simulator in tests. - -The crate keeps that boundary narrow. Most callers should depend on -`RuntimeDispatch` instead of reaching directly for Tokio or simulator internals. - -## Top-level API - -The top-level module in [src/lib.rs](./src/lib.rs) exposes: - -- `RuntimeDispatch` - A small tagged runtime handle with two backends: - - `Tokio(tokio::runtime::Handle)` when the `tokio` feature is enabled - - `Simulation(sim::Handle)` when the `simulation` feature is enabled -- `spawn(...)` - Fire-and-forget task spawning. -- `spawn_blocking(...)` - Run blocking work on the runtime-appropriate backend. - On Tokio this uses `tokio::task::spawn_blocking`. - In simulation this is still scheduled through the simulator so ordering stays - deterministic. -- `timeout(...)` - Runtime-relative timeout handling. - On Tokio this uses `tokio::time::timeout`. - In simulation this uses virtual time from `sim::time`. -- `current_handle_or_new_runtime()` - Tokio convenience for production code that may or may not already be inside a - Tokio runtime. - -The design goal is intentionally modest: this crate is not a general async -framework. It is a compatibility layer for the small set of runtime operations -SpacetimeDB core code actually needs. - -## Features - -The crate has two independent backends: +`spacetimedb-runtime` is the runtime boundary shared by SpacetimeDB core code +and DST. The goal is not to emulate all of Tokio. We do not aim to support +`tokio::net`, `tokio::fs`, or arbitrary ecosystem compatibility here. The goal +is much narrower: provide the small amount of execution control that core +database code needs so that it can run under either a deterministic single- +threaded runtime or a hosted adapter. -- `tokio` - Enables production runtime support and is part of the default feature set. -- `simulation` - Enables the deterministic local simulation runtime used by DST. - -Code can compile with one or both features enabled. `RuntimeDispatch` exposes -only the backends that were actually compiled in. - -## Simulation Modules - -The simulation backend lives under [src/sim](./src/sim). - -### `sim::mod` - -[src/sim/mod.rs](./src/sim/mod.rs) is the façade for the deterministic runtime. -It re-exports the main executor types and keeps the public surface small: - -- `Runtime` - Owns the simulator executor. -- `Handle` - Cloneable access to that executor from spawned tasks. -- `NodeId` - Logical node identifier used to group and pause/resume work. -- `JoinHandle` - Awaitable handle for spawned simulated tasks. -- `yield_now` - Cooperative yield point inside the simulator. -- `time` - Virtual time utilities. -- `Rng` and `DecisionSource` - Deterministic randomness primitives. - -It also exposes small helpers such as `advance_time(...)` and -`decision_source(...)`. - -### `sim::executor` - -[src/sim/executor.rs](./src/sim/executor.rs) is the heart of the simulator. +The crate is intentionally hybrid. Some parts of the process are naturally +Tokio-owned today, especially networking, subscriptions, and other integration- +heavy infrastructure. DST and selected core/database paths need a different +model: single-threaded, deterministic scheduling, explicit time, and a runtime +that can move toward `no_std + alloc`. This crate exists to support both +execution domains without forcing the whole process onto one scheduler. -It provides a single-threaded async executor adapted from madsim's task loop: +## Architecture -- tasks are stored as `async_task` runnables -- ready work is chosen by a deterministic RNG instead of an OS/runtime scheduler -- node state can be paused and resumed -- a thread-local handle context makes the current simulation runtime accessible - from inside spawned work -- determinism can be checked by replaying the same future twice and comparing - the sequence of scheduler decisions +The top-level type in [src/lib.rs](./src/lib.rs) is `Runtime`. It is the small +facade that shared core code should depend on. `Runtime` is not the simulator +itself and it is not Tokio. It is a tagged handle with the backends that matter +to SpacetimeDB: -Important behavior: +- `Runtime::Tokio(TokioHandle)` when the `tokio` feature is enabled +- `Runtime::Simulation(sim::Handle)` when the `simulation` feature is enabled -- `Runtime::block_on(...)` drives the whole simulation -- `Handle::spawn_on(...)` schedules work onto a logical node -- absence of runnable work and absence of future timer wakeups is treated as a - hang, which is exactly what DST wants +Code such as durability and snapshotting should accept or store `Runtime` and +use only the narrow operations exposed there: `spawn`, `spawn_blocking`, and +`timeout`. That keeps shared logic independent of the hosted runtime choice. -This module is the reason `RuntimeDispatch::Simulation` can behave like a real -runtime without giving up reproducibility. +Under that facade, this crate has two layers. -### `sim::time` +The first layer is the simulation core under [src/sim](./src/sim). This is the +deterministic single-thread runtime used by DST. The long-term direction for +this layer is `no_std + alloc`, explicit handles, explicit time, and no +dependency on ambient host facilities. -[src/sim/time.rs](./src/sim/time.rs) implements virtual time. +The second layer is the hosted adapter layer under [src/adapter](./src/adapter). +Today that includes a Tokio adapter and std-hosted simulation conveniences. The +Tokio adapter exists because some production and testing paths still need a real +process runtime. The std-hosted simulation helpers exist because determinism +testing, thread-local convenience APIs, and Unix hooks are useful in hosted +environments even though they are not part of the portable simulation core. -It provides: +## Feature Model -- `now()` - Current simulated time. -- `sleep(duration)` - A future that completes when simulated time reaches the deadline. -- `timeout(duration, future)` - Race a future against simulated time. -- `advance(duration)` - Move time forward explicitly. +The crate is organized around features that reflect that layering. -Internally it maintains: - -- a current `Duration` -- timer registrations keyed by deadline -- wakeups for due timers - -The executor uses this module to move time only when necessary, which keeps -tests deterministic and avoids tying correctness to wall-clock behavior. - -### `sim::rng` - -[src/sim/rng.rs](./src/sim/rng.rs) provides deterministic randomness. - -There are two layers: - -- `Rng` - Stateful deterministic RNG used by the executor and runtime internals. -- `DecisionSource` - Small lock-free source for probabilistic choices in test/workload code. - -This module also does two extra jobs: - -- records and checks determinism checkpoints so repeated seeded runs can prove - they took the same execution path -- hooks libc randomness calls such as `getrandom` so code running inside the - simulator sees deterministic randomness instead of ambient system entropy +- `simulation` + Enables the deterministic simulation runtime core. This is the part that is + intended to move toward `no_std + alloc`. +- `simulation-std` + Enables std-hosted conveniences layered on top of `simulation`, such as + thread-local current-handle access, determinism replay helpers, and host OS + integration hooks used by DST in a normal process. +- `tokio` + Enables the Tokio-backed hosted adapter and remains part of the default + feature set for now. +- `std` + Enables hosted-only functionality shared by the adapter layer. -That second point matters because reproducibility falls apart quickly if a -dependency reads randomness outside the simulator's control. +This means “simulation” is not shorthand for “all simulation tooling.” It is +the portable runtime core. Hosted extras live behind `simulation-std`. -### `sim::system_thread` +## Simulation Core -[src/sim/system_thread.rs](./src/sim/system_thread.rs) prevents accidental OS -thread creation while running under simulation. +The simulation core lives under [src/sim](./src/sim). -On Unix it intercepts `pthread_attr_init` and fails fast if code tries to spawn -real system threads from inside the simulator. That protects determinism and -enforces the intended execution model: simulated tasks should run on the -simulator, not escape onto real threads. +[src/sim/executor.rs](./src/sim/executor.rs) contains the single-threaded +deterministic executor. It stores ready tasks as `async_task` runnables, uses a +deterministic RNG to choose the next runnable, supports pause/resume by logical +node, and treats “no runnable work and no future timer wakeups” as a hang. -## How This Crate Is Intended To Be Used +[src/sim/time.rs](./src/sim/time.rs) contains virtual time. It owns simulated +time state, timer registration, and timeout behavior. The key property is that +time moves only under runtime control, not wall clock control. -For core code: +[src/sim/rng.rs](./src/sim/rng.rs) contains deterministic randomness. The +runtime uses this for scheduler choices, and test/workload code can use +`DecisionSource` when it needs deterministic probabilistic decisions. -- accept or store `RuntimeDispatch` -- use `spawn`, `spawn_blocking`, and `timeout` -- avoid embedding raw Tokio assumptions into shared logic +The public simulation surface is intentionally explicit: `sim::Runtime`, +`sim::Handle`, `sim::NodeId`, `sim::JoinHandle`, `yield_now`, and the virtual +time and RNG utilities. The portable direction is to make explicit-handle APIs +the main interface, with host-style convenience APIs layered separately. -For production-only code: +## Adapter Layer -- use `RuntimeDispatch::tokio_current()` or `RuntimeDispatch::tokio(handle)` +The adapter layer lives under [src/adapter](./src/adapter). -For DST: +[src/adapter/tokio.rs](./src/adapter/tokio.rs) is the Tokio facade. It defines +the hosted Tokio types used by the top-level runtime facade and provides +`current_handle_or_new_runtime()` for production code that may or may not +already be inside a Tokio runtime. -- create `sim::Runtime` -- run the test harness with `Runtime::block_on(...)` -- pass `RuntimeDispatch::simulation_current()` into the code under test +Std-hosted simulation helpers stay outside the simulation core as well. These +helpers are valuable, but they are adapters around the core, not the core +itself. Examples include thread-local “current runtime” access, determinism +replay helpers, and Unix hooks that prevent simulation from silently escaping +onto real OS threads. ## Current Scope -This crate is intentionally narrow. It is not trying to replace Tokio, and it -is not a generic distributed simulator. It currently provides exactly the -runtime seams needed by SpacetimeDB components that must run both in production -and under deterministic simulation. +This crate is not trying to make the whole of core `no_std` immediately. For +now, crates such as `relational_db`, `snapshot`, `commitlog`, and `datastore` +may still use `tokio::sync` internally. That is acceptable in the short term, +because those synchronization primitives are runtime-agnostic enough for DST and +the current runtime boundary effort is focused on execution control, not total +removal of Tokio-adjacent types from core. + +The longer-term goal is to reduce those dependencies where it materially helps +portability or determinism, but that work is explicitly out of scope for the +first phase of this crate architecture. + +## Intended Usage + +Shared core/database code should depend on `Runtime`, not on raw Tokio handles +or simulator internals. DST should construct `sim::Runtime` directly and use it +to drive deterministic test execution. Hosted production/testing code that still +needs Tokio should use the Tokio adapter through `Runtime::tokio(...)`, +`Runtime::tokio_current()`, and `current_handle_or_new_runtime()`. + +The likely end state is still hybrid: core/database execution may eventually run +on the same deterministic single-thread runtime in both DST and selected +production paths, while networking, clients, subscriptions, and other hosted +subsystems continue to live on Tokio. That is a deliberate design choice, not a +temporary inconsistency. diff --git a/crates/runtime/src/adapter/mod.rs b/crates/runtime/src/adapter/mod.rs new file mode 100644 index 00000000000..a254877d883 --- /dev/null +++ b/crates/runtime/src/adapter/mod.rs @@ -0,0 +1,5 @@ +#[cfg(feature = "tokio")] +pub mod tokio; + +#[cfg(feature = "simulation-std")] +pub mod sim_std; diff --git a/crates/runtime/src/adapter/sim_std.rs b/crates/runtime/src/adapter/sim_std.rs new file mode 100644 index 00000000000..2eaa160adec --- /dev/null +++ b/crates/runtime/src/adapter/sim_std.rs @@ -0,0 +1,361 @@ +use alloc::boxed::Box; +use core::{ + cell::{Cell, RefCell}, + future::Future, + ptr, + time::Duration, +}; +use std::sync::OnceLock; + +use crate::sim; + +thread_local! { + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; + static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; + static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +pub(crate) struct HandleContextGuard { + previous: Option, +} + +pub(crate) struct RngContextGuard { + previous: Option, +} + +pub(crate) struct SimulationThreadGuard { + previous: bool, +} + +pub fn simulation_current() -> crate::Runtime { + crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) +} + +pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { + ensure_rng_hooks_linked(); + if !init_std_random_state(runtime.rng().seed()) { + tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); + } + let _handle_context = enter_handle_context(runtime.handle()); + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(runtime.rng()); + runtime.block_on(future) +} + +pub fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +pub fn advance_time(duration: Duration) { + current_handle() + .expect("simulation runtime is not active on this thread") + .advance(duration); +} + +pub fn now() -> Duration { + current_handle().map(|handle| handle.now()).unwrap_or_default() +} + +pub fn sleep(duration: Duration) -> sim::time::Sleep { + current_handle() + .expect("sim::time::sleep polled outside sim runtime") + .sleep(duration) +} + +pub async fn timeout(duration: Duration, future: impl Future) -> Result { + current_handle() + .expect("sim::time::timeout polled outside sim runtime") + .timeout(duration, future) + .await +} + +pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output +where + F: Future + 'static, + F::Output: Send + 'static, +{ + check_determinism_with(seed, make_future) +} + +pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output +where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, +{ + let first = make_future.clone(); + let log = std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_log(); + block_on(&mut runtime, first()); + runtime + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_check(log); + let output = block_on(&mut runtime, make_future()); + runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() +} + +pub fn enable_buggify() { + current_handle() + .expect("simulation runtime is not active on this thread") + .enable_buggify(); +} + +pub fn disable_buggify() { + current_handle() + .expect("simulation runtime is not active on this thread") + .disable_buggify(); +} + +pub fn is_buggify_enabled() -> bool { + current_handle().is_some_and(|handle| handle.is_buggify_enabled()) +} + +pub fn buggify() -> bool { + current_handle() + .expect("simulation runtime is not active on this thread") + .buggify() +} + +pub fn buggify_with_prob(probability: f64) -> bool { + current_handle() + .expect("simulation runtime is not active on this thread") + .buggify_with_prob(probability) +} + +pub(crate) fn enter_handle_context(handle: sim::Handle) -> HandleContextGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + HandleContextGuard { previous } +} + +pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +pub(crate) fn enter_rng_context(rng: sim::GlobalRng) -> RngContextGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + RngContextGuard { previous } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +fn init_std_random_state(seed: u64) -> bool { + STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); + let _ = std::collections::hash_map::RandomState::new(); + STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() +} + +fn ensure_rng_hooks_linked() { + unsafe { + getentropy(ptr::null_mut(), 0); + } +} + +fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { + if buflen == 0 { + return; + } + let rng = sim::GlobalRng::new(seed); + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); +} + +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); + true + }) +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +impl Drop for HandleContextGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +impl Drop for RngContextGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + #[cfg(target_os = "macos")] + let _ = flags; + + if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { + fill_from_seed(buf, buflen, seed); + return buflen as isize; + } + if fill_from_current_rng(buf, buflen) { + return buflen as isize; + } + + #[cfg(target_os = "linux")] + { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + let original = GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }); + unsafe { original(buf, buflen, flags) } + } + + #[cfg(target_os = "macos")] + { + type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; + static GETENTROPY: OnceLock = OnceLock::new(); + let original = GETENTROPY.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getentropy"); + std::mem::transmute(ptr) + }); + match unsafe { original(buf, buflen) } { + -1 => -1, + 0 => buflen as isize, + _ => unreachable!("unexpected getentropy return value"), + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (buf, buflen, flags); + compile_error!("unsupported OS for DST getrandom override"); + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(target_os = "macos")] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { + match unsafe { getrandom(bytes, count, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200); + runtime.block_on(async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = sim::GlobalRng::new(20); + let _guard = enter_rng_context(rng.clone()); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let expected_rng = sim::GlobalRng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn std_hashmap_order_is_seeded_for_runtime_thread() { + fn order_for(seed: u64) -> Vec<(u64, u64)> { + std::thread::spawn(move || { + let _ = init_std_random_state(seed); + (0..12) + .map(|idx| (idx, idx)) + .collect::>() + .into_iter() + .collect() + }) + .join() + .unwrap() + } + + assert_eq!(order_for(30), order_for(30)); + } +} diff --git a/crates/runtime/src/adapter/tokio.rs b/crates/runtime/src/adapter/tokio.rs new file mode 100644 index 00000000000..5d605bba39a --- /dev/null +++ b/crates/runtime/src/adapter/tokio.rs @@ -0,0 +1,11 @@ +pub type TokioHandle = tokio::runtime::Handle; +pub type TokioRuntime = tokio::runtime::Runtime; + +pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { + if let Ok(handle) = TokioHandle::try_current() { + return Ok((handle, None)); + } + + let runtime = TokioRuntime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 7633ef08e40..8777409c402 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,19 +1,22 @@ +#![cfg_attr(not(any(feature = "tokio", feature = "simulation-std")), no_std)] + //! Runtime and deterministic simulation utilities shared by core and DST. -use std::{fmt, future::Future, time::Duration}; +extern crate alloc; + +use core::{fmt, future::Future, time::Duration}; +pub mod adapter; #[cfg(feature = "simulation")] pub mod sim; #[cfg(feature = "tokio")] -pub type Handle = tokio::runtime::Handle; -#[cfg(feature = "tokio")] -pub type Runtime = tokio::runtime::Runtime; +pub use adapter::tokio::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; #[derive(Clone)] -pub enum RuntimeDispatch { +pub enum Runtime { #[cfg(feature = "tokio")] - Tokio(Handle), + Tokio(TokioHandle), #[cfg(feature = "simulation")] Simulation(sim::Handle), } @@ -27,17 +30,18 @@ impl fmt::Display for RuntimeTimeout { } } +#[cfg(any(feature = "tokio", feature = "simulation-std"))] impl std::error::Error for RuntimeTimeout {} -impl RuntimeDispatch { +impl Runtime { #[cfg(feature = "tokio")] - pub fn tokio(handle: Handle) -> Self { + pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } #[cfg(feature = "tokio")] pub fn tokio_current() -> Self { - Self::tokio(Handle::current()) + Self::tokio(TokioHandle::current()) } #[cfg(feature = "simulation")] @@ -45,9 +49,9 @@ impl RuntimeDispatch { Self::Simulation(handle) } - #[cfg(feature = "simulation")] + #[cfg(feature = "simulation-std")] pub fn simulation_current() -> Self { - Self::simulation(sim::Handle::current().expect("simulation runtime is not active on this thread")) + adapter::sim_std::simulation_current() } pub fn spawn(&self, future: impl Future + Send + 'static) { @@ -102,21 +106,9 @@ impl RuntimeDispatch { .await .map_err(|_| RuntimeTimeout), #[cfg(feature = "simulation")] - Self::Simulation(_) => sim::time::timeout(timeout_after, future) - .await - .map_err(|_| RuntimeTimeout), + Self::Simulation(handle) => handle.timeout(timeout_after, future).await.map_err(|_| RuntimeTimeout), #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } } } - -#[cfg(feature = "tokio")] -pub fn current_handle_or_new_runtime() -> anyhow::Result<(Handle, Option)> { - if let Ok(handle) = Handle::try_current() { - return Ok((handle, None)); - } - - let runtime = Runtime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} diff --git a/crates/runtime/src/sim/buggify.rs b/crates/runtime/src/sim/buggify.rs new file mode 100644 index 00000000000..07188c6c207 --- /dev/null +++ b/crates/runtime/src/sim/buggify.rs @@ -0,0 +1,51 @@ +use crate::sim::Runtime; + +/// Probabilistic fault-injection helpers for simulation code. +/// +/// Reference: . +/// +/// Buggify is tied to a specific simulation runtime. Callers toggle it on that +/// runtime, then ask whether a fault should be injected at a particular point. +pub fn enable(runtime: &Runtime) { + runtime.enable_buggify(); +} + +/// Disable probabilistic fault injection for the given simulation runtime. +pub fn disable(runtime: &Runtime) { + runtime.disable_buggify(); +} + +/// Returns whether buggify is enabled for the given simulation runtime. +pub fn is_enabled(runtime: &Runtime) -> bool { + runtime.is_buggify_enabled() +} + +/// Returns whether the runtime should inject a fault at this point using the +/// default deterministic probability. +pub fn should_inject_fault(runtime: &Runtime) -> bool { + runtime.buggify() +} + +/// Returns whether the runtime should inject a fault at this point using the +/// provided deterministic probability. +pub fn should_inject_fault_with_prob(runtime: &Runtime, probability: f64) -> bool { + runtime.buggify_with_prob(probability) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn runtime_owned_buggify_controls_fault_injection() { + let runtime = Runtime::new(7); + + assert!(!is_enabled(&runtime)); + enable(&runtime); + assert!(is_enabled(&runtime)); + assert!(should_inject_fault_with_prob(&runtime, 1.0)); + disable(&runtime); + assert!(!is_enabled(&runtime)); + assert!(!should_inject_fault_with_prob(&runtime, 1.0)); + } +} diff --git a/crates/runtime/src/sim/config.rs b/crates/runtime/src/sim/config.rs new file mode 100644 index 00000000000..92ab8d0fdbc --- /dev/null +++ b/crates/runtime/src/sim/config.rs @@ -0,0 +1,16 @@ +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeConfig { + pub seed: u64, +} + +impl RuntimeConfig { + pub const fn new(seed: u64) -> Self { + Self { seed } + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new(0) + } +} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 765b70f631b..e0a28afc4ba 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -1,29 +1,19 @@ //! Minimal asynchronous executor adapted from madsim's `sim/task` loop. -use std::{ - cell::RefCell, - collections::BTreeMap, +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use core::{ fmt, future::Future, - panic::AssertUnwindSafe, pin::Pin, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, - }, + sync::atomic::{AtomicBool, AtomicU64, Ordering}, task::{Context, Poll}, - thread::{self, Thread}, time::Duration, }; use futures_util::FutureExt; +use spin::Mutex; -use crate::sim::{ - rng::{enter_rng_context, DeterminismLog}, - system_thread::enter_simulation_thread, - time::{enter_time_context, TimeHandle}, - Rng, -}; +use crate::sim::{time::TimeHandle, Rng, RuntimeConfig}; type Runnable = async_task::Runnable; @@ -32,6 +22,7 @@ type Runnable = async_task::Runnable; pub struct NodeId(u64); impl NodeId { + /// The default node for single-node simulation or top-level runtime work. pub const MAIN: Self = Self(0); } @@ -51,39 +42,60 @@ pub struct Runtime { } impl Runtime { - pub fn new(seed: u64) -> anyhow::Result { - Ok(Self { - executor: Arc::new(Executor::new(seed)), - }) + /// Create a simulation runtime seeded for deterministic scheduling and RNG. + pub fn new(seed: u64) -> Self { + Self::with_config(RuntimeConfig::new(seed)) + } + + /// Create a simulation runtime from an explicit runtime configuration. + pub fn with_config(config: RuntimeConfig) -> Self { + Self { + executor: Arc::new(Executor::new(config)), + } } + /// Drive a top-level future to completion on the simulation executor. + /// + /// While the future runs, spawned tasks share the same deterministic + /// scheduler, timer wheel, and runtime RNG. pub fn block_on(&mut self, future: F) -> F::Output { - let _handle_context = enter_handle_context(self.handle()); self.executor.block_on(future) } + /// Return the amount of virtual time elapsed in this runtime. pub fn elapsed(&self) -> Duration { self.executor.elapsed() } + /// Get a cloneable handle for spawning tasks and accessing runtime services. pub fn handle(&self) -> Handle { Handle { executor: Arc::clone(&self.executor), } } + /// Create a new simulated node. + /// + /// Nodes are a scheduling/pausing boundary rather than separate executors: + /// all nodes still run on the same single-threaded runtime. pub fn create_node(&self) -> NodeId { self.handle().create_node() } + /// Pause scheduling for a node. + /// + /// Tasks already queued for the node are retained and will run only after + /// the node is resumed. pub fn pause(&self, node: NodeId) { self.handle().pause(node); } + /// Resume scheduling for a previously paused node. pub fn resume(&self, node: NodeId) { self.handle().resume(node); } + /// Spawn a `Send` future onto a specific simulated node. pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -92,49 +104,53 @@ impl Runtime { self.handle().spawn_on(node, future) } - /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output - where - F: Future + 'static, - F::Output: Send + 'static, - { - Self::check_determinism_with(seed, make_future) + pub fn enable_buggify(&self) { + self.executor.enable_buggify(); } - /// Run a future twice with the same seed and fail if simulator choices diverge. - pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output - where - M: Fn() -> F + Clone + Send + 'static, - F: Future + 'static, - F::Output: Send + 'static, - { - let first = make_future.clone(); - let log = thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); - runtime.executor.enable_determinism_log(); - runtime.block_on(first()); - runtime - .executor - .take_determinism_log() - .expect("determinism log should be enabled") - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap(); - - thread::spawn(move || { - let mut runtime = Runtime::new(seed).expect("failed to create simulation runtime"); - runtime.executor.enable_determinism_check(log); - let output = runtime.block_on(make_future()); - runtime - .executor - .finish_determinism_check() - .unwrap_or_else(|err| panic!("{err}")); - output - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap() + /// Disable probabilistic fault injection for this runtime. + pub fn disable_buggify(&self) { + self.executor.disable_buggify(); + } + + /// Return whether buggify is enabled for this runtime. + pub fn is_buggify_enabled(&self) -> bool { + self.executor.is_buggify_enabled() + } + + /// Sample the default runtime buggify probability. + pub fn buggify(&self) -> bool { + self.executor.buggify() + } + + /// Sample a caller-provided runtime buggify probability. + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.executor.buggify_with_prob(probability) + } + + #[allow(dead_code)] + pub(crate) fn enable_determinism_log(&self) { + self.executor.rng.enable_determinism_log(); + } + + #[allow(dead_code)] + pub(crate) fn enable_determinism_check(&self, log: crate::sim::DeterminismLog) { + self.executor.rng.enable_determinism_check(log); + } + + #[allow(dead_code)] + pub(crate) fn take_determinism_log(&self) -> Option { + self.executor.rng.take_determinism_log() + } + + #[allow(dead_code)] + pub(crate) fn finish_determinism_check(&self) -> Result<(), alloc::string::String> { + self.executor.rng.finish_determinism_check() + } + + #[allow(dead_code)] + pub(crate) fn rng(&self) -> Rng { + self.executor.rng.clone() } } @@ -145,22 +161,22 @@ pub struct Handle { } impl Handle { - pub fn current() -> Option { - current_handle() - } - + /// Create a new simulated node owned by this runtime. pub fn create_node(&self) -> NodeId { self.executor.create_node() } + /// Pause scheduling for a node. pub fn pause(&self, node: NodeId) { self.executor.pause(node); } + /// Resume scheduling for a node and requeue any buffered tasks for it. pub fn resume(&self, node: NodeId) { self.executor.resume(node); } + /// Spawn a `Send` future onto a specific simulated node. pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -169,6 +185,9 @@ impl Handle { self.executor.spawn_on(node, future) } + /// Spawn a non-`Send` future onto a specific simulated node. + /// + /// This is only valid because the simulation executor is single-threaded. pub fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + 'static, @@ -176,30 +195,53 @@ impl Handle { { self.executor.spawn_local_on(node, future) } -} -thread_local! { - static CURRENT_HANDLE: RefCell> = RefCell::new(None); -} + /// Return the current virtual time for this runtime. + pub fn now(&self) -> Duration { + self.executor.time.now() + } -pub(crate) fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} + /// Move virtual time forward explicitly. + pub fn advance(&self, duration: Duration) { + self.executor.time.advance(duration); + } -fn enter_handle_context(handle: Handle) -> HandleContextGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - HandleContextGuard { previous } -} + /// Create a future that becomes ready after `duration` of virtual time. + pub fn sleep(&self, duration: Duration) -> crate::sim::time::Sleep { + self.executor.time.sleep(duration) + } -struct HandleContextGuard { - previous: Option, -} + /// Race a future against a virtual-time timeout. + pub async fn timeout( + &self, + duration: Duration, + future: impl Future, + ) -> Result { + self.executor.time.timeout(duration, future).await + } -impl Drop for HandleContextGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); + pub fn enable_buggify(&self) { + self.executor.enable_buggify(); + } + + /// Disable probabilistic fault injection for this runtime. + pub fn disable_buggify(&self) { + self.executor.disable_buggify(); + } + + /// Return whether buggify is enabled for this runtime. + pub fn is_buggify_enabled(&self) -> bool { + self.executor.is_buggify_enabled() + } + + /// Sample the default runtime buggify probability. + pub fn buggify(&self) -> bool { + self.executor.buggify() + } + + /// Sample a caller-provided runtime buggify probability. + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.executor.buggify_with_prob(probability) } } @@ -209,6 +251,7 @@ pub struct JoinHandle { } impl JoinHandle { + /// Detach the task so it continues running without awaiting its output. pub fn detach(self) { self.task.detach(); } @@ -222,31 +265,32 @@ impl Future for JoinHandle { } } -fn panic_with_seed(seed: u64, payload: Box) -> ! { - eprintln!("note: run with --seed {seed} to reproduce this error"); - std::panic::resume_unwind(payload); -} - +/// Core single-threaded scheduler backing a simulation [`Runtime`]. +/// +/// The executor owns the runnable queue, per-node pause state, deterministic +/// RNG, and virtual time. Tasks are selected from the queue using the runtime +/// RNG so the schedule is reproducible for a given seed. struct Executor { queue: Receiver, sender: Sender, - nodes: Mutex>>, - next_node: std::sync::atomic::AtomicU64, - rng: Arc>, + nodes: spin::Mutex>>, + next_node: AtomicU64, + rng: Rng, time: TimeHandle, } impl Executor { - fn new(seed: u64) -> Self { + /// Construct a fresh executor with one default `MAIN` node. + fn new(config: RuntimeConfig) -> Self { let queue = Queue::new(); let mut nodes = BTreeMap::new(); nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); Self { queue: queue.receiver(), sender: queue.sender(), - nodes: Mutex::new(nodes), - next_node: std::sync::atomic::AtomicU64::new(1), - rng: Arc::new(Mutex::new(Rng::new(seed))), + nodes: spin::Mutex::new(nodes), + next_node: AtomicU64::new(1), + rng: Rng::new(config.seed), time: TimeHandle::new(), } } @@ -255,45 +299,49 @@ impl Executor { self.time.now() } - fn enable_determinism_log(&self) { - self.rng.lock().expect("sim rng poisoned").enable_determinism_log(); + fn enable_buggify(&self) { + self.rng.enable_buggify(); } - fn enable_determinism_check(&self, log: DeterminismLog) { - self.rng.lock().expect("sim rng poisoned").enable_determinism_check(log); + fn disable_buggify(&self) { + self.rng.disable_buggify(); } - fn take_determinism_log(&self) -> Option { - self.rng.lock().expect("sim rng poisoned").take_determinism_log() + fn is_buggify_enabled(&self) -> bool { + self.rng.is_buggify_enabled() } - fn finish_determinism_check(&self) -> Result<(), String> { - self.rng.lock().expect("sim rng poisoned").finish_determinism_check() + fn buggify(&self) -> bool { + self.rng.buggify() + } + + fn buggify_with_prob(&self, probability: f64) -> bool { + self.rng.buggify_with_prob(probability) } fn create_node(&self) -> NodeId { let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); - self.nodes - .lock() - .expect("nodes poisoned") - .insert(id, Arc::new(NodeState::default())); + self.nodes.lock().insert(id, Arc::new(NodeState::default())); id } + /// Mark a node as paused so newly selected runnables are buffered. fn pause(&self, node: NodeId) { self.node_state(node).paused.store(true, Ordering::Relaxed); } + /// Mark a node as runnable again and requeue any buffered tasks for it. fn resume(&self, node: NodeId) { let state = self.node_state(node); state.paused.store(false, Ordering::Relaxed); - let mut paused = state.paused_queue.lock().expect("paused queue poisoned"); + let mut paused = state.paused_queue.lock(); for runnable in paused.drain(..) { self.sender.send(runnable); } } + /// Spawn a `Send` task and enqueue its runnable on the shared runtime queue. fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -310,6 +358,7 @@ impl Executor { JoinHandle { task } } + /// Spawn a non-`Send` task on the single-threaded runtime. fn spawn_local_on(&self, node: NodeId, future: F) -> JoinHandle where F: Future + 'static, @@ -329,12 +378,12 @@ impl Executor { } #[track_caller] + /// Run the top-level future until completion. + /// + /// The executor repeatedly drains runnable tasks, then advances virtual + /// time to the next timer when the queue is empty. If neither runnable work + /// nor timers remain, the simulation is considered deadlocked. fn block_on(&self, future: F) -> F::Output { - let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(Arc::clone(&self.rng)); - let _time_context = enter_time_context(self.time.clone()); - let _waiter = WaiterGuard::new(&self.queue, thread::current()); - let sender = self.sender.clone(); let (runnable, task) = unsafe { async_task::Builder::new() @@ -357,41 +406,48 @@ impl Executor { } } + /// Drain the runnable queue, selecting tasks in deterministic RNG order. + /// + /// Paused-node tasks are diverted into that node's paused buffer instead of + /// being polled immediately. fn run_all_ready(&self) { while let Some(runnable) = self.queue.try_recv_random(&self.rng) { let node = *runnable.metadata(); let state = self.node_state(node); if state.paused.load(Ordering::Relaxed) { - state.paused_queue.lock().expect("paused queue poisoned").push(runnable); + state.paused_queue.lock().push(runnable); continue; } - let result = std::panic::catch_unwind(AssertUnwindSafe(|| runnable.run())); - if let Err(payload) = result { - std::panic::resume_unwind(payload); - } + runnable.run(); } } + /// Look up the scheduling state for a node, panicking if the node is unknown. fn node_state(&self, node: NodeId) -> Arc { self.nodes .lock() - .expect("nodes poisoned") .get(&node) .cloned() .unwrap_or_else(|| panic!("unknown simulated node {node}")) } } +/// Per-node scheduler state shared by tasks assigned to that node. #[derive(Clone, Default)] struct NodeState { paused: Arc, paused_queue: Arc>>, } +/// Yield back to the scheduler once. +/// +/// This is the smallest explicit interleaving point available to simulated +/// tasks when they need to give other runnables a chance to execute. pub async fn yield_now() { YieldNow { yielded: false }.await } +/// One-shot future backing [`yield_now`]. struct YieldNow { yielded: bool, } @@ -410,40 +466,26 @@ impl Future for YieldNow { } } -struct WaiterGuard<'a> { - receiver: &'a Receiver, -} - -impl<'a> WaiterGuard<'a> { - fn new(receiver: &'a Receiver, thread: Thread) -> Self { - receiver.set_waiter(Some(thread)); - Self { receiver } - } -} - -impl Drop for WaiterGuard<'_> { - fn drop(&mut self) { - self.receiver.set_waiter(None); - } -} - +/// Shared runnable queue used by the simulation executor. struct Queue { inner: Arc, } +/// Sending end of the runnable queue. #[derive(Clone)] struct Sender { inner: Arc, } +/// Receiving end of the runnable queue. #[derive(Clone)] struct Receiver { inner: Arc, } +/// Queue storage for runnables awaiting scheduling. struct QueueInner { queue: Mutex>, - waiter: Mutex>, } impl Queue { @@ -451,7 +493,6 @@ impl Queue { Self { inner: Arc::new(QueueInner { queue: Mutex::new(Vec::new()), - waiter: Mutex::new(None), }), } } @@ -470,25 +511,20 @@ impl Queue { } impl Sender { + /// Push a runnable onto the shared queue. fn send(&self, runnable: Runnable) { - self.inner.queue.lock().expect("run queue poisoned").push(runnable); - if let Some(thread) = self.inner.waiter.lock().expect("waiter poisoned").as_ref() { - thread.unpark(); - } + self.inner.queue.lock().push(runnable); } } impl Receiver { - fn set_waiter(&self, thread: Option) { - *self.inner.waiter.lock().expect("waiter poisoned") = thread; - } - - fn try_recv_random(&self, rng: &Mutex) -> Option { - let mut queue = self.inner.queue.lock().expect("run queue poisoned"); + /// Remove one runnable using the runtime RNG to choose among ready tasks. + fn try_recv_random(&self, rng: &Rng) -> Option { + let mut queue = self.inner.queue.lock(); if queue.is_empty() { return None; } - let idx = rng.lock().expect("rng poisoned").index(queue.len()); + let idx = rng.index(queue.len()); Some(queue.swap_remove(idx)) } } @@ -501,10 +537,11 @@ mod tests { }; use super::*; + use crate::sim::RuntimeConfig; #[test] fn paused_node_does_not_run_until_resumed() { - let mut runtime = Runtime::new(1).unwrap(); + let mut runtime = Runtime::new(1); let node = runtime.create_node(); runtime.pause(node); @@ -527,7 +564,7 @@ mod tests { #[test] fn handle_can_spawn_onto_node_from_simulated_task() { - let mut runtime = Runtime::new(2).unwrap(); + let mut runtime = Runtime::new(2); let handle = runtime.handle(); let value = runtime.block_on(async move { @@ -538,13 +575,45 @@ mod tests { assert_eq!(value, 11); } + #[test] + fn runtime_config_sets_seed() { + let runtime = Runtime::with_config(RuntimeConfig::new(77)); + let handle = runtime.handle(); + handle.enable_buggify(); + + let actual = (0..8).map(|_| handle.buggify_with_prob(0.5)).collect::>(); + + let expected = { + let mut rng = Rng::new(77); + rng.enable_buggify(); + (0..8).map(|_| rng.buggify_with_prob(0.5)).collect::>() + }; + + assert_eq!(actual, expected); + } + + #[test] + fn runtime_and_handle_share_buggify_state() { + let runtime = Runtime::new(6); + let handle = runtime.handle(); + + assert!(!runtime.is_buggify_enabled()); + runtime.enable_buggify(); + assert!(handle.is_buggify_enabled()); + assert!(handle.buggify_with_prob(1.0)); + handle.disable_buggify(); + assert!(!runtime.is_buggify_enabled()); + } + + #[cfg(feature = "simulation-std")] #[test] fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(Handle::current().is_none()); + assert!(crate::adapter::sim_std::current_handle().is_none()); - let mut runtime = Runtime::new(5).unwrap(); - let value = runtime.block_on(async { - let handle = Handle::current().expect("sim handle should be present inside block_on"); + let mut runtime = Runtime::new(5); + let value = crate::adapter::sim_std::block_on(&mut runtime, async { + let handle = + crate::adapter::sim_std::current_handle().expect("sim handle should be present inside block_on"); let node = handle.create_node(); let captured = std::rc::Rc::new(17); handle @@ -556,15 +625,16 @@ mod tests { }); assert_eq!(value, 17); - assert!(Handle::current().is_none()); + assert!(crate::adapter::sim_std::current_handle().is_none()); } + #[cfg(feature = "simulation-std")] #[test] fn check_determinism_runs_future_twice() { static CALLS: AtomicUsize = AtomicUsize::new(0); CALLS.store(0, Ordering::SeqCst); - let value = Runtime::check_determinism(3, || async { + let value = crate::adapter::sim_std::check_determinism(3, || async { CALLS.fetch_add(1, Ordering::SeqCst); yield_now().await; 13 @@ -574,13 +644,14 @@ mod tests { assert_eq!(CALLS.load(Ordering::SeqCst), 2); } + #[cfg(feature = "simulation-std")] #[test] #[should_panic(expected = "non-determinism detected")] fn check_determinism_rejects_different_scheduler_sequence() { static FIRST_RUN: AtomicBool = AtomicBool::new(true); FIRST_RUN.store(true, Ordering::SeqCst); - Runtime::check_determinism(4, || async { + crate::adapter::sim_std::check_determinism(4, || async { if FIRST_RUN.swap(false, Ordering::SeqCst) { yield_now().await; } diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 467903cf2b4..1b778f96d62 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -4,20 +4,13 @@ //! futures are scheduled as runnable tasks and the ready queue is sampled by a //! deterministic RNG instead of being driven by a package-level async runtime. +pub mod buggify; +mod config; mod executor; mod rng; -mod system_thread; pub mod time; -use std::time::Duration; - +pub use config::RuntimeConfig; pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; -pub use rng::{DecisionSource, Rng}; - -pub fn advance_time(duration: Duration) { - time::advance(duration); -} - -pub fn decision_source(seed: u64) -> DecisionSource { - DecisionSource::new(seed) -} +pub(crate) use rng::DeterminismLog; +pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs index 09afde03031..602eae59979 100644 --- a/crates/runtime/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -1,135 +1,190 @@ -use std::{ - cell::{Cell, RefCell}, - ptr, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, OnceLock, - }, -}; +use alloc::{format, string::String}; +use alloc::{sync::Arc, vec::Vec}; +use spin::Mutex; -const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; +pub type Rng = GlobalRng; +/// Shared deterministic RNG for the simulation core. +/// +/// The simulator owns one runtime-wide RNG handle and uses it for scheduler +/// choices, probabilistic fault injection, and determinism checks. Hosted +/// conveniences such as thread-local current-RNG access and libc random hooks +/// live in `adapter::sim_std`, not here. #[derive(Clone, Debug)] -pub struct Rng { +pub struct GlobalRng { + inner: Arc>, +} + +#[derive(Debug)] +struct Inner { + /// Seed used to initialize the runtime RNG, carried for diagnostics and replay. seed: u64, - state: u64, + /// Deterministic generator used for scheduler choices and fault injection decisions. + rng: SplitMix64, + /// Checkpoints recorded during the first determinism run. log: Option>, + /// Expected checkpoints plus the number already consumed during replay. check: Option<(Vec, usize)>, + /// Whether probabilistic fault injection is currently enabled for this runtime. + buggify_enabled: bool, } -impl Rng { - pub fn new(seed: u64) -> Self { - unsafe { getentropy(ptr::null_mut(), 0) }; - if !init_std_random_state(seed) { - tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +/// Reference for SplitMix64 algorithm: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 +/// Splitmix64 is the default pseudo-random number generator algorithm. +/// It uses a fairly simple algorithm that, though it is considered +/// to be poor for cryptographic purposes, is very fast to calculate, +/// and is "good enough" for many random number needs. +/// It passes several fairly rigorous PRNG "fitness" tests that some more complex algorithms fail. +#[derive(Clone, Debug)] +struct SplitMix64 { + state: u64, +} + +impl SplitMix64 { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(GAMMA); + mix64(self.state) + } + + fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(core::mem::size_of::()) { + let bytes = self.next_u64().to_ne_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); } + } +} + +fn mix64(mut x: u64) -> u64 { + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +impl GlobalRng { + /// Create a new deterministic RNG for a simulation runtime. + pub fn new(seed: u64) -> Self { Self { - seed, - state: splitmix64(seed), - log: None, - check: None, + inner: Arc::new(Mutex::new(Inner { + seed, + rng: SplitMix64::new(seed), + log: None, + check: None, + buggify_enabled: false, + })), } } - pub fn next_u64(&mut self) -> u64 { - self.state = self.state.wrapping_add(GAMMA); - let value = splitmix64(self.state); - self.record_checkpoint(value); - value + pub fn next_u64(&self) -> u64 { + self.with_inner(|inner| inner.rng.next_u64()) } - pub fn index(&mut self, len: usize) -> usize { + pub fn index(&self, len: usize) -> usize { assert!(len > 0, "len must be non-zero"); (self.next_u64() as usize) % len } - pub fn sample_probability(&mut self, probability: f64) -> bool { + pub fn sample_probability(&self, probability: f64) -> bool { probability_sample(self.next_u64(), probability) } - pub(crate) fn fill_bytes(&mut self, dest: &mut [u8]) { - for chunk in dest.chunks_mut(std::mem::size_of::()) { - let bytes = self.next_u64().to_ne_bytes(); - chunk.copy_from_slice(&bytes[..chunk.len()]); - } + pub fn enable_buggify(&self) { + self.inner.lock().buggify_enabled = true; } - pub(crate) fn enable_determinism_log(&mut self) { - self.log = Some(Vec::new()); - self.check = None; + pub fn disable_buggify(&self) { + self.inner.lock().buggify_enabled = false; } - pub(crate) fn enable_determinism_check(&mut self, log: DeterminismLog) { - self.check = Some((log.0, 0)); - self.log = None; + pub fn is_buggify_enabled(&self) -> bool { + self.inner.lock().buggify_enabled } - pub(crate) fn take_determinism_log(&mut self) -> Option { - self.log - .take() - .or_else(|| self.check.take().map(|(log, _)| log)) - .map(DeterminismLog) + pub fn buggify(&self) -> bool { + self.buggify_with_prob(0.25) } - pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { - if let Some((log, consumed)) = &self.check - && *consumed != log.len() - { - return Err(format!( - "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", - self.seed, - log.len() - )); - } - Ok(()) + pub fn buggify_with_prob(&self, probability: f64) -> bool { + self.is_buggify_enabled() && self.sample_probability(probability) } - fn record_checkpoint(&mut self, value: u64) { - if self.log.is_none() && self.check.is_none() { - return; - } + #[allow(dead_code)] + pub(crate) fn seed(&self) -> u64 { + self.inner.lock().seed + } - let checkpoint = checksum(value); - if let Some(log) = &mut self.log { - log.push(checkpoint); - } - if let Some((expected, consumed)) = &mut self.check { - if expected.get(*consumed) != Some(&checkpoint) { - panic!( - "non-determinism detected for seed {} at checkpoint {consumed}", - self.seed - ); + fn with_inner(&self, f: impl FnOnce(&mut Inner) -> T) -> T { + let mut inner = self.inner.lock(); + let output = f(&mut inner); + if inner.log.is_some() || inner.check.is_some() { + let checkpoint = checksum(inner.rng.clone().next_u64()); + if let Some(log) = &mut inner.log { + log.push(checkpoint); + } + let seed = inner.seed; + if let Some((expected, consumed)) = &mut inner.check { + if expected.get(*consumed) != Some(&checkpoint) { + panic!("non-determinism detected for seed {} at checkpoint {consumed}", seed); + } + *consumed += 1; } - *consumed += 1; } + output } -} -#[derive(Debug, Clone, Eq, PartialEq)] -pub(crate) struct DeterminismLog(Vec); + #[allow(dead_code)] + pub(crate) fn fill_bytes(&self, dest: &mut [u8]) { + self.with_inner(|inner| inner.rng.fill_bytes(dest)); + } -#[derive(Debug)] -pub struct DecisionSource { - state: AtomicU64, -} + #[allow(dead_code)] + pub(crate) fn enable_determinism_log(&self) { + let mut inner = self.inner.lock(); + inner.log = Some(Vec::new()); + inner.check = None; + } -impl DecisionSource { - pub fn new(seed: u64) -> Self { - Self { - state: AtomicU64::new(splitmix64(seed)), - } + #[allow(dead_code)] + pub(crate) fn enable_determinism_check(&self, log: DeterminismLog) { + let mut inner = self.inner.lock(); + inner.check = Some((log.0, 0)); + inner.log = None; } - pub fn sample_probability(&self, probability: f64) -> bool { - probability_sample(self.next_u64(), probability) + #[allow(dead_code)] + pub(crate) fn take_determinism_log(&self) -> Option { + let mut inner = self.inner.lock(); + inner + .log + .take() + .or_else(|| inner.check.take().map(|(log, _)| log)) + .map(DeterminismLog) } - fn next_u64(&self) -> u64 { - let state = self.state.fetch_add(GAMMA, Ordering::Relaxed).wrapping_add(GAMMA); - splitmix64(state) + #[allow(dead_code)] + pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { + let inner = self.inner.lock(); + if let Some((log, consumed)) = &inner.check { + if *consumed != log.len() { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + inner.seed, + log.len() + )); + } + } + Ok(()) } } +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct DeterminismLog(Vec); + fn probability_sample(value: u64, probability: f64) -> bool { if probability <= 0.0 { return false; @@ -138,169 +193,26 @@ fn probability_sample(value: u64, probability: f64) -> bool { return true; } - // Use the top 53 bits to build an exactly representable f64 in [0, 1). let unit = (value >> 11) as f64 * (1.0 / ((1u64 << 53) as f64)); unit < probability } -fn splitmix64(mut x: u64) -> u64 { - x = x.wrapping_add(GAMMA); - x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); - x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); - x ^ (x >> 31) -} - fn checksum(value: u64) -> u8 { value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) } -thread_local! { - static CURRENT_RNG: RefCell>>> = const { RefCell::new(None) }; - static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; -} - -pub(crate) struct RngContextGuard { - previous: Option>>, -} - -pub(crate) fn enter_rng_context(rng: Arc>) -> RngContextGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - RngContextGuard { previous } -} - -impl Drop for RngContextGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -fn init_std_random_state(seed: u64) -> bool { - STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); - let _ = std::collections::hash_map::RandomState::new(); - STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() -} - -fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { - if buflen == 0 { - return; - } - let mut state = splitmix64(seed); - let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; - for chunk in buf.chunks_mut(std::mem::size_of::()) { - state = state.wrapping_add(GAMMA); - let bytes = splitmix64(state).to_ne_bytes(); - chunk.copy_from_slice(&bytes[..chunk.len()]); - } -} - -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { std::slice::from_raw_parts_mut(buf, buflen) }; - rng.lock().expect("sim rng poisoned").fill_bytes(buf); - true - }) -} - -/// Obtain random bytes through the simulation RNG when running inside the DST executor. -/// -/// This mirrors madsim's libc-level hook. It covers libc users and macOS -/// `CCRandomGenerateBytes`; crates that issue raw kernel syscalls can still -/// bypass it. -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - #[cfg(target_os = "macos")] - let _ = flags; - - if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { - fill_from_seed(buf, buflen, seed); - return buflen as isize; - } - if fill_from_current_rng(buf, buflen) { - return buflen as isize; - } - - #[cfg(target_os = "linux")] - { - type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; - static GETRANDOM: OnceLock = OnceLock::new(); - let original = GETRANDOM.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getrandom"); - std::mem::transmute(ptr) - }); - unsafe { original(buf, buflen, flags) } - } - - #[cfg(target_os = "macos")] - { - type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; - static GETENTROPY: OnceLock = OnceLock::new(); - let original = GETENTROPY.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getentropy"); - std::mem::transmute(ptr) - }); - match unsafe { original(buf, buflen) } { - -1 => -1, - 0 => buflen as isize, - _ => unreachable!("unexpected getentropy return value"), - } - } - - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - { - let _ = (buf, buflen, flags); - compile_error!("unsupported OS for DST getrandom override"); - } -} - -/// Fill a buffer with random bytes through the same hook used by libc. -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { - if buflen > 256 { - return -1; - } - match unsafe { getrandom(buf, buflen, 0) } { - -1 => -1, - _ => 0, - } -} - -/// macOS uses CommonCrypto for process randomness in newer Rust toolchains. -#[cfg(target_os = "macos")] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { - match unsafe { getrandom(bytes, count, 0) } { - -1 => -1, - _ => 0, - } -} - #[cfg(test)] mod tests { - use std::{collections::HashMap, sync::Arc}; - use super::*; #[test] fn rng_log_check_accepts_same_sequence() { - let mut first = Rng::new(10); + let first = Rng::new(10); first.enable_determinism_log(); let first_values = (0..8).map(|_| first.next_u64()).collect::>(); let log = first.take_determinism_log().unwrap(); - let mut second = Rng::new(10); + let second = Rng::new(10); second.enable_determinism_check(log); let second_values = (0..8).map(|_| second.next_u64()).collect::>(); second.finish_determinism_check().unwrap(); @@ -309,59 +221,22 @@ mod tests { } #[test] - fn decision_source_matches_rng_sequence() { - let source = DecisionSource::new(12); - let mut rng = Rng::new(12); - - for _ in 0..16 { - assert_eq!(source.next_u64(), rng.next_u64()); - } - } - - #[test] - #[should_panic(expected = "non-determinism detected")] - fn rng_log_check_rejects_different_sequence() { - let mut first = Rng::new(10); - first.enable_determinism_log(); - first.next_u64(); - let log = first.take_determinism_log().unwrap(); - - let mut second = Rng::new(11); - second.enable_determinism_check(log); - second.next_u64(); - } - - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = Arc::new(Mutex::new(Rng::new(20))); - let _guard = enter_rng_context(Arc::clone(&rng)); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + fn buggify_is_disabled_by_default() { + let rng = Rng::new(20); + for _ in 0..8 { + assert!(!rng.buggify()); + assert!(!rng.buggify_with_prob(1.0)); } - - let mut expected_rng = Rng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); } #[test] - fn std_hashmap_order_is_seeded_for_runtime_thread() { - fn order_for(seed: u64) -> Vec<(u64, u64)> { - std::thread::spawn(move || { - let _rng = Rng::new(seed); - (0..12) - .map(|idx| (idx, idx)) - .collect::>() - .into_iter() - .collect() - }) - .join() - .unwrap() - } - - assert_eq!(order_for(30), order_for(30)); + fn buggify_obeys_enable_and_disable() { + let rng = Rng::new(21); + rng.enable_buggify(); + assert!(rng.is_buggify_enabled()); + assert!(rng.buggify_with_prob(1.0)); + rng.disable_buggify(); + assert!(!rng.is_buggify_enabled()); + assert!(!rng.buggify_with_prob(1.0)); } } diff --git a/crates/runtime/src/sim/system_thread.rs b/crates/runtime/src/sim/system_thread.rs deleted file mode 100644 index f395a25442a..00000000000 --- a/crates/runtime/src/sim/system_thread.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Guard against creating OS threads from inside the simulator. - -use std::{cell::Cell, sync::OnceLock}; - -thread_local! { - static IN_SIMULATION: Cell = const { Cell::new(false) }; -} - -pub(crate) struct SimulationThreadGuard { - previous: bool, -} - -pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { - let previous = IN_SIMULATION.with(|state| state.replace(true)); - SimulationThreadGuard { previous } -} - -impl Drop for SimulationThreadGuard { - fn drop(&mut self) { - IN_SIMULATION.with(|state| { - state.set(self.previous); - }); - } -} - -fn in_simulation() -> bool { - IN_SIMULATION.with(Cell::get) -} - -/// Forbid creating system threads in simulation. -#[cfg(unix)] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { - if in_simulation() { - eprintln!("attempt to spawn a system thread in simulation."); - eprintln!("note: use simulator tasks instead."); - return -1; - } - - type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; - static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); - let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); - std::mem::transmute(ptr) - }); - unsafe { original(attr) } -} - -#[cfg(test)] -mod tests { - use crate::sim; - - #[test] - #[cfg(unix)] - fn runtime_forbids_system_thread_spawn() { - let mut runtime = sim::Runtime::new(200).unwrap(); - runtime.block_on(async { - let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); - assert!(result.is_err()); - }); - } -} diff --git a/crates/runtime/src/sim/time.rs b/crates/runtime/src/sim/time.rs deleted file mode 100644 index 2508b35b249..00000000000 --- a/crates/runtime/src/sim/time.rs +++ /dev/null @@ -1,343 +0,0 @@ -//! Virtual time for the local simulation runtime. - -use std::{ - cell::RefCell, - collections::BTreeMap, - fmt, - future::Future, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll, Waker}, - time::Duration, -}; - -use futures::future::{select, Either}; - -#[derive(Clone, Debug)] -pub struct TimeHandle { - inner: Arc>, -} - -impl TimeHandle { - pub fn new() -> Self { - Self { - inner: Arc::new(Mutex::new(TimeState::default())), - } - } - - pub fn now(&self) -> Duration { - self.inner.lock().expect("sim time poisoned").now - } - - pub fn advance(&self, duration: Duration) { - if duration.is_zero() { - return; - } - - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.now = state.now.saturating_add(duration); - state.take_due_wakers() - }; - wake_all(wakers); - } - - pub fn wake_next_timer(&self) -> bool { - let wakers = { - let mut state = self.inner.lock().expect("sim time poisoned"); - let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { - return false; - }; - if next_deadline > state.now { - state.now = next_deadline; - } - state.take_due_wakers() - }; - let woke = !wakers.is_empty(); - wake_all(wakers); - woke - } - - fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { - let mut state = self.inner.lock().expect("sim time poisoned"); - state.timers.insert( - id, - TimerEntry { - deadline, - waker: waker.clone(), - }, - ); - } - - fn cancel_timer(&self, id: TimerId) { - self.inner.lock().expect("sim time poisoned").timers.remove(&id); - } - - fn next_timer_id(&self) -> TimerId { - let mut state = self.inner.lock().expect("sim time poisoned"); - let id = TimerId(state.next_timer_id); - state.next_timer_id = state.next_timer_id.saturating_add(1); - id - } -} - -impl Default for TimeHandle { - fn default() -> Self { - Self::new() - } -} - -#[derive(Debug, Default)] -struct TimeState { - now: Duration, - next_timer_id: u64, - timers: BTreeMap, -} - -impl TimeState { - fn take_due_wakers(&mut self) -> Vec { - let due = self - .timers - .iter() - .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) - .collect::>(); - due.into_iter() - .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) - .collect() - } -} - -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -struct TimerId(u64); - -#[derive(Debug)] -struct TimerEntry { - deadline: Duration, - waker: Waker, -} - -thread_local! { - static CURRENT_TIME: RefCell> = const { RefCell::new(None) }; -} - -pub struct TimeContextGuard { - previous: Option, -} - -pub fn enter_time_context(handle: TimeHandle) -> TimeContextGuard { - let previous = CURRENT_TIME.with(|current| current.replace(Some(handle))); - TimeContextGuard { previous } -} - -pub fn try_current_handle() -> Option { - CURRENT_TIME.with(|current| current.borrow().clone()) -} - -pub fn now() -> Duration { - try_current_handle().map(|handle| handle.now()).unwrap_or_default() -} - -pub fn advance(duration: Duration) { - if let Some(handle) = try_current_handle() { - handle.advance(duration); - } -} - -pub fn sleep(duration: Duration) -> Sleep { - Sleep { - duration, - state: SleepState::Unregistered, - } -} - -pub async fn timeout(duration: Duration, future: impl Future) -> Result { - futures::pin_mut!(future); - let sleep = sleep(duration); - futures::pin_mut!(sleep); - - match select(future, sleep).await { - Either::Left((output, _)) => Ok(output), - Either::Right(((), _)) => Err(TimeoutElapsed { duration }), - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct TimeoutElapsed { - duration: Duration, -} - -impl TimeoutElapsed { - pub fn duration(self) -> Duration { - self.duration - } -} - -impl fmt::Display for TimeoutElapsed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "simulated timeout elapsed after {:?}", self.duration) - } -} - -impl std::error::Error for TimeoutElapsed {} - -impl Drop for TimeContextGuard { - fn drop(&mut self) { - CURRENT_TIME.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -pub struct Sleep { - duration: Duration, - state: SleepState, -} - -enum SleepState { - Unregistered, - Registered { - handle: TimeHandle, - id: TimerId, - deadline: Duration, - }, - Done, -} - -impl Future for Sleep { - type Output = (); - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if matches!(self.state, SleepState::Done) { - return Poll::Ready(()); - } - - if matches!(self.state, SleepState::Unregistered) { - let handle = try_current_handle().expect("sim::time::sleep polled outside sim runtime"); - let deadline = handle.now().saturating_add(self.duration); - let id = handle.next_timer_id(); - self.state = SleepState::Registered { handle, id, deadline }; - } - - let SleepState::Registered { handle, id, deadline } = &self.state else { - unreachable!("sleep state should be registered or done"); - }; - - if handle.now() >= *deadline { - let handle = handle.clone(); - let id = *id; - handle.cancel_timer(id); - self.state = SleepState::Done; - Poll::Ready(()) - } else { - handle.register_timer(*id, *deadline, cx.waker()); - Poll::Pending - } - } -} - -impl Drop for Sleep { - fn drop(&mut self) { - if let SleepState::Registered { handle, id, .. } = &self.state { - handle.cancel_timer(*id); - } - } -} - -fn wake_all(wakers: Vec) { - for waker in wakers { - waker.wake(); - } -} - -#[cfg(test)] -mod tests { - use std::{ - sync::{Arc, Mutex}, - time::Duration, - }; - - use crate::sim; - - #[test] - fn sleep_fast_forwards_virtual_time() { - let mut runtime = sim::Runtime::new(101).unwrap(); - - runtime.block_on(async { - assert_eq!(super::now(), Duration::ZERO); - super::sleep(Duration::from_millis(5)).await; - assert_eq!(super::now(), Duration::from_millis(5)); - }); - } - - #[test] - fn shorter_timer_wakes_first() { - let mut runtime = sim::Runtime::new(102).unwrap(); - let handle = runtime.handle(); - let order = Arc::new(Mutex::new(Vec::new())); - - runtime.block_on({ - let order = Arc::clone(&order); - async move { - let slow_order = Arc::clone(&order); - let slow = handle.spawn_on(sim::NodeId::MAIN, async move { - super::sleep(Duration::from_millis(10)).await; - slow_order.lock().expect("order poisoned").push(10); - }); - - let fast_order = Arc::clone(&order); - let fast = handle.spawn_on(sim::NodeId::MAIN, async move { - super::sleep(Duration::from_millis(3)).await; - fast_order.lock().expect("order poisoned").push(3); - }); - - fast.await; - slow.await; - } - }); - - assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); - assert_eq!(runtime.elapsed(), Duration::from_millis(10)); - } - - #[test] - fn explicit_advance_moves_virtual_time() { - let mut runtime = sim::Runtime::new(103).unwrap(); - - runtime.block_on(async { - super::advance(Duration::from_millis(7)); - assert_eq!(super::now(), Duration::from_millis(7)); - }); - } - - #[test] - fn timeout_returns_future_output_before_deadline() { - let mut runtime = sim::Runtime::new(104).unwrap(); - - let output = runtime.block_on(async { - super::timeout(Duration::from_millis(10), async { - super::sleep(Duration::from_millis(3)).await; - 9 - }) - .await - }); - - assert_eq!(output, Ok(9)); - assert_eq!(runtime.elapsed(), Duration::from_millis(3)); - } - - #[test] - fn timeout_expires_at_virtual_deadline() { - let mut runtime = sim::Runtime::new(105).unwrap(); - - let output = runtime.block_on(async { - super::timeout(Duration::from_millis(4), async { - super::sleep(Duration::from_millis(20)).await; - 9 - }) - .await - }); - - assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); - assert_eq!(runtime.elapsed(), Duration::from_millis(4)); - } -} diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs new file mode 100644 index 00000000000..6210675f638 --- /dev/null +++ b/crates/runtime/src/sim/time/mod.rs @@ -0,0 +1,297 @@ +//! Virtual time for the local simulation runtime. + +mod sleep; + +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use core::{fmt, future::Future, task::Waker, time::Duration}; + +use futures_util::{select_biased, FutureExt}; +use sleep::wake_all; +use spin::Mutex; + +pub use sleep::Sleep; + +/// Shared virtual clock and timer registry for one simulation runtime. +/// +/// All cloned handles observe the same virtual `now`, pending timers, and +/// timer-id sequence. The executor uses this handle both for explicit +/// time-travel operations and for jumping directly to the next pending timer +/// when the runnable queue is empty. +#[derive(Clone, Debug)] +pub struct TimeHandle { + inner: Arc>, +} + +impl TimeHandle { + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(TimeState::default())), + } + } + + pub fn now(&self) -> Duration { + self.inner.lock().now + } + + /// Move virtual time forward by an explicit amount. + /// + /// This is the direct "advance the clock" operation used by tests and + /// higher-level simulation code. It updates `now`, removes any timers that + /// became due at the new instant, and wakes the corresponding tasks after + /// releasing the lock. + pub fn advance(&self, duration: Duration) { + if duration.is_zero() { + return; + } + + let wakers = { + let mut state = self.inner.lock(); + state.now = state.now.saturating_add(duration); + state.take_due_wakers() + }; + wake_all(wakers); + } + + /// Jump virtual time to the earliest outstanding timer and wake it. + /// + /// The executor calls this when there are no runnable tasks left. Instead + /// of incrementing time in wall-clock steps, simulation time jumps + /// directly to the minimum timer deadline. Returns `false` if there are no + /// timers to wake. + pub fn wake_next_timer(&self) -> bool { + let wakers = { + let mut state = self.inner.lock(); + let Some(next_deadline) = state.timers.values().map(|timer| timer.deadline).min() else { + return false; + }; + if next_deadline > state.now { + state.now = next_deadline; + } + state.take_due_wakers() + }; + let woke = !wakers.is_empty(); + wake_all(wakers); + woke + } + + /// Register or refresh a timer entry for a sleeping future. + /// + /// Sleep futures keep a stable `TimerId` across polls. Re-registering with + /// the same id updates the stored waker without creating duplicate timers. + fn register_timer(&self, id: TimerId, deadline: Duration, waker: &Waker) { + let mut state = self.inner.lock(); + state.timers.insert( + id, + TimerEntry { + deadline, + waker: waker.clone(), + }, + ); + } + + /// Remove a timer entry if it is still present. + /// + /// Cancellation is best-effort because the timer may already have been + /// removed by a wakeup path before the caller reaches this point. + fn cancel_timer(&self, id: TimerId) { + self.inner.lock().timers.remove(&id); + } + + /// Allocate a fresh timer id for a new sleep future. + /// + /// Stable timer ids are what let a `Sleep` future re-register itself + /// across polls while still mapping back to a single timer entry. + fn next_timer_id(&self) -> TimerId { + let mut state = self.inner.lock(); + let id = TimerId(state.next_timer_id); + state.next_timer_id = state.next_timer_id.saturating_add(1); + id + } + + /// Create a future that becomes ready after `duration` of virtual time. + /// + /// The returned future is lazy: it does not allocate a timer entry until + /// the first poll, when it can anchor its deadline to the current virtual + /// time. + pub fn sleep(&self, duration: Duration) -> Sleep { + Sleep::new(self.clone(), duration) + } + + /// Race a future against a virtual-time sleep. + /// + /// This is implemented as `future` versus `sleep(duration)` using a biased + /// select. If both become ready in the same simulated step, the main + /// future wins the tie so completion beats timeout deterministically. + pub async fn timeout(&self, duration: Duration, future: impl Future) -> Result { + let sleep = self.sleep(duration); + futures::pin_mut!(future); + futures::pin_mut!(sleep); + + select_biased! { + output = future.fuse() => Ok(output), + () = sleep.fuse() => Err(TimeoutElapsed { duration }), + } + } +} + +impl Default for TimeHandle { + fn default() -> Self { + Self::new() + } +} + +/// Mutable state behind a [`TimeHandle`]. +/// +/// `timers` is keyed by stable `TimerId` so a `Sleep` future can refresh its +/// waker across polls without accumulating duplicate entries. A `BTreeMap` is +/// used to keep due-timer iteration deterministic. +#[derive(Debug, Default)] +struct TimeState { + now: Duration, + next_timer_id: u64, + timers: BTreeMap, +} + +impl TimeState { + /// Remove every timer whose deadline is at or before the current virtual + /// time and return their wakers. + fn take_due_wakers(&mut self) -> Vec { + let due = self + .timers + .iter() + .filter_map(|(id, timer)| (timer.deadline <= self.now).then_some(*id)) + .collect::>(); + due.into_iter() + .filter_map(|id| self.timers.remove(&id).map(|timer| timer.waker)) + .collect() + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct TimerId(u64); + +/// Stored metadata for one pending timer. +#[derive(Debug)] +struct TimerEntry { + deadline: Duration, + waker: Waker, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TimeoutElapsed { + duration: Duration, +} + +impl TimeoutElapsed { + pub fn duration(self) -> Duration { + self.duration + } +} + +impl fmt::Display for TimeoutElapsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "simulated timeout elapsed after {:?}", self.duration) + } +} + +#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl std::error::Error for TimeoutElapsed {} + +#[cfg(test)] +mod tests { + use std::{sync::Arc, time::Duration}; + + use crate::sim; + use spin::Mutex; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101); + let handle = runtime.handle(); + + runtime.block_on(async move { + assert_eq!(handle.now(), Duration::ZERO); + handle.sleep(Duration::from_millis(5)).await; + assert_eq!(handle.now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow_handle = handle.clone(); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + slow_handle.sleep(Duration::from_millis(10)).await; + slow_order.lock().push(10); + }); + + let fast_order = Arc::clone(&order); + let fast_handle = handle.clone(); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + fast_handle.sleep(Duration::from_millis(3)).await; + fast_order.lock().push(3); + }); + + fast.await; + slow.await; + } + }); + + assert_eq!(*order.lock(), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103); + let handle = runtime.handle(); + + runtime.block_on(async move { + handle.advance(Duration::from_millis(7)); + assert_eq!(handle.now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104); + let handle = runtime.handle(); + + let output = runtime.block_on(async move { + handle + .timeout(Duration::from_millis(10), async { + handle.sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105); + let handle = runtime.handle(); + + let output = runtime.block_on(async move { + handle + .timeout(Duration::from_millis(4), async { + handle.sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/runtime/src/sim/time/sleep.rs b/crates/runtime/src/sim/time/sleep.rs new file mode 100644 index 00000000000..538439018b7 --- /dev/null +++ b/crates/runtime/src/sim/time/sleep.rs @@ -0,0 +1,97 @@ +use alloc::vec::Vec; +use core::{ + future::Future, + pin::Pin, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use super::{TimeHandle, TimerId}; + +/// Future returned by [`TimeHandle::sleep`]. +/// +/// The future stores a relative duration until first poll, then converts that +/// into an absolute deadline and a stable timer id. Subsequent polls either +/// complete immediately if virtual time has already reached the deadline or +/// refresh the registered waker and remain pending. +pub struct Sleep { + duration: Duration, + state: SleepState, +} + +impl Sleep { + pub(super) fn new(handle: TimeHandle, duration: Duration) -> Self { + Self { + duration, + state: SleepState::Unregistered { handle }, + } + } +} + +/// Internal state machine for [`Sleep`]. +enum SleepState { + Unregistered { + handle: TimeHandle, + }, + Registered { + handle: TimeHandle, + id: TimerId, + deadline: Duration, + }, + Done, +} + +impl Future for Sleep { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if matches!(self.state, SleepState::Done) { + return Poll::Ready(()); + } + + if let SleepState::Unregistered { handle } = &self.state { + let handle = handle.clone(); + let deadline = handle.now().saturating_add(self.duration); + let id = handle.next_timer_id(); + self.state = SleepState::Registered { handle, id, deadline }; + } + + let SleepState::Registered { handle, id, deadline } = &self.state else { + unreachable!("sleep state should be registered or done"); + }; + + if handle.now() >= *deadline { + let handle = handle.clone(); + let id = *id; + handle.cancel_timer(id); + self.state = SleepState::Done; + Poll::Ready(()) + } else { + handle.register_timer(*id, *deadline, cx.waker()); + Poll::Pending + } + } +} + +impl Drop for Sleep { + /// Remove a pending timer entry when the future is dropped early. + /// + /// This prevents stale wakers from remaining in the runtime after the + /// corresponding task has been cancelled or a timeout race has completed. + fn drop(&mut self) { + if let SleepState::Registered { handle, id, .. } = &self.state { + handle.cancel_timer(*id); + } + } +} + +/// Wake every task collected from a due-timer scan. +/// +/// Waking happens only after the time-state mutex has been released so resumed +/// tasks can inspect or mutate timer state without deadlocking on the same +/// lock. +pub(super) fn wake_all(wakers: Vec) { + for waker in wakers { + waker.wake(); + } +} diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs new file mode 100644 index 00000000000..18d45b894a2 --- /dev/null +++ b/crates/runtime/tests/sim_e2e.rs @@ -0,0 +1,108 @@ +#![cfg(feature = "simulation")] + +use std::{sync::Arc, time::Duration}; + +use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; +use spin::Mutex; + +#[test] +fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { + let mut runtime = Runtime::new(101); + let handle = runtime.handle(); + let node_a = runtime.create_node(); + let node_b = runtime.create_node(); + let events = Arc::new(Mutex::new(Vec::new())); + + runtime.pause(node_b); + + runtime.block_on({ + let events = Arc::clone(&events); + async move { + let a_handle = handle.clone(); + let a_events = Arc::clone(&events); + let a = handle.spawn_on(node_a, async move { + a_events.lock().push(("a_started", a_handle.now())); + a_handle.sleep(Duration::from_millis(3)).await; + a_events.lock().push(("a_finished", a_handle.now())); + }); + + let b_handle = handle.clone(); + let b_events = Arc::clone(&events); + let b = handle.spawn_on(node_b, async move { + b_events.lock().push(("b_started", b_handle.now())); + b_handle.sleep(Duration::from_millis(2)).await; + b_events.lock().push(("b_finished", b_handle.now())); + }); + + handle.sleep(Duration::from_millis(1)).await; + events.lock().push(("main_resumed_b", handle.now())); + handle.resume(node_b); + + a.await; + b.await; + } + }); + + let events = events.lock().clone(); + assert!(events.contains(&("a_started", Duration::ZERO))); + assert!(events.contains(&("main_resumed_b", Duration::from_millis(1)))); + assert!(events.contains(&("b_started", Duration::from_millis(1)))); + assert!(events.contains(&("a_finished", Duration::from_millis(3)))); + assert!(events.contains(&("b_finished", Duration::from_millis(3)))); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); +} + +#[test] +fn runtime_buggify_matches_standalone_rng_sequence() { + let seed = 77; + let runtime = Runtime::new(seed); + let expected = Rng::new(seed); + + buggify::enable(&runtime); + expected.enable_buggify(); + + let actual = (0..8) + .map(|_| buggify::should_inject_fault_with_prob(&runtime, 0.5)) + .collect::>(); + let expected = (0..8).map(|_| expected.buggify_with_prob(0.5)).collect::>(); + + assert_eq!(actual, expected); + assert!(buggify::is_enabled(&runtime)); + + buggify::disable(&runtime); + assert!(!buggify::is_enabled(&runtime)); + assert!(!buggify::should_inject_fault_with_prob(&runtime, 1.0)); +} + +#[test] +fn multi_node_timeout_uses_shared_virtual_clock() { + let mut runtime = Runtime::new(303); + let handle = runtime.handle(); + let slow_node = runtime.create_node(); + let fast_node = runtime.create_node(); + + let output = runtime.block_on(async move { + let slow_handle = handle.clone(); + let slow = handle.spawn_on(slow_node, async move { + slow_handle + .timeout(Duration::from_millis(4), async { + slow_handle.sleep(Duration::from_millis(10)).await; + "slow-finished" + }) + .await + }); + + let fast_handle = handle.clone(); + let fast = handle.spawn_on(fast_node, async move { + fast_handle.sleep(Duration::from_millis(2)).await; + ("fast-finished", fast_handle.now()) + }); + + (slow.await, fast.await) + }); + + let (slow, fast) = output; + assert_eq!(fast, ("fast-finished", Duration::from_millis(2))); + assert_eq!(slow.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); +} From e4de2bdea1556b76bf53953bbb1dad5bf591aff7 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 18:21:51 +0530 Subject: [PATCH 09/40] drop durability in reopen test helper --- crates/core/src/db/relational_db.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ca0d1d3ccdb..4d87f3df918 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2232,11 +2232,12 @@ pub mod tests_utils { drop(self.db); if let Some(DurableState { - durability: _, + durability, rt, replica_dir, }) = self.durable { + drop(durability); // Enter the runtime so that `Self::durable_internal` can spawn a `SnapshotWorker`. let _rt = rt.enter(); let (db, handle) = Self::durable_internal(&replica_dir, rt.handle().clone(), self.want_snapshot_repo)?; From 795a7049d398562ae1d6cdc4014f61f7dc309bb8 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 19:51:53 +0530 Subject: [PATCH 10/40] drop durability in test --- crates/core/src/db/relational_db.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 4d87f3df918..e6ebf098d22 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2024,7 +2024,7 @@ pub mod tests_utils { use super::*; use core::ops::Deref; - use durability::EmptyHistory; + use durability::{Durability, EmptyHistory}; use spacetimedb_datastore::locking_tx_datastore::MutTxId; use spacetimedb_datastore::locking_tx_datastore::TxId; use spacetimedb_fs_utils::compression::CompressType; @@ -2237,6 +2237,7 @@ pub mod tests_utils { replica_dir, }) = self.durable { + rt.block_on(durability.close()); drop(durability); // Enter the runtime so that `Self::durable_internal` can spawn a `SnapshotWorker`. let _rt = rt.enter(); From 425e728dd257989192a5e209a81b0440930881d2 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Mon, 11 May 2026 20:36:54 +0530 Subject: [PATCH 11/40] fix snapshot compressor --- Cargo.toml | 4 ++-- crates/core/src/db/relational_db.rs | 4 ++-- crates/core/src/db/snapshot.rs | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4b88f753b9f..f4f74204ea3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,7 +140,7 @@ spacetimedb-pg = { path = "crates/pg", version = "=2.2.0" } spacetimedb-physical-plan = { path = "crates/physical-plan", version = "=2.2.0" } spacetimedb-primitives = { path = "crates/primitives", version = "=2.2.0" } spacetimedb-query = { path = "crates/query", version = "=2.2.0" } -spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0", default-features = false } +spacetimedb-runtime = { path = "crates/runtime", version = "=2.2.0" } spacetimedb-sats = { path = "crates/sats", version = "=2.2.0" } spacetimedb-schema = { path = "crates/schema", version = "=2.2.0" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.2.0" } @@ -391,7 +391,7 @@ features = [ ] [workspace.lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)', 'cfg(simulation)'] } +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } [workspace.lints.clippy] # FIXME: we should work on this lint incrementally diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index fad350334d1..df25edb87c4 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2187,7 +2187,7 @@ pub mod tests_utils { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { SnapshotWorker::new_with_repository( repo, - snapshot::Compression::Disabled, + snapshot::Compression::Enabled, Runtime::tokio(rt.clone()), ) }) @@ -2317,7 +2317,7 @@ pub mod tests_utils { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { SnapshotWorker::new_with_repository( repo, - snapshot::Compression::Disabled, + snapshot::Compression::Enabled, Runtime::tokio(rt.clone()), ) }) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 042b257b608..4e3428b20f8 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -69,7 +69,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, runtime: Runtime) -> Self { + pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Runtime) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -81,7 +81,12 @@ impl SnapshotWorker { snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), runtime: runtime.clone(), - compression: None, + compression: compression.is_enabled().then(|| Compressor { + snapshot_repo: snapshot_repo.clone(), + metrics: CompressionMetrics::new(database), + stats: <_>::default(), + runtime: runtime.clone(), + }), }; runtime.spawn(actor.run()); @@ -342,7 +347,7 @@ impl CompressionMetrics { } struct Compressor { - snapshot_repo: Arc, + snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, runtime: Runtime, From 466481c869bc5a97f011120f92933ff69c79a4f1 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 00:13:55 +0530 Subject: [PATCH 12/40] minor fixes --- crates/core/src/database_logger.rs | 6 ++-- crates/core/src/db/durability.rs | 2 -- crates/core/src/db/persistence.rs | 6 ++-- crates/core/src/db/relational_db.rs | 28 ++++++------------ crates/core/src/db/snapshot.rs | 44 ++++------------------------- crates/snapshot/tests/remote.rs | 6 ++-- 6 files changed, 22 insertions(+), 70 deletions(-) diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index f194cb60a48..0e202229dea 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader, ReadBuf}; +use tokio::io::{AsyncRead, BufReader}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 6d3b814a55f..965196c97ee 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -35,7 +35,6 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, databa let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { - log::info!("starting spawn close"); match runtime.timeout(Duration::from_secs(10), durability.close()).await { Err(_elapsed) => { error!("{label} timeout waiting for durability shutdown"); @@ -44,7 +43,6 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, databa info!("{label} durability shut down at tx offset: {offset:?}"); } } - log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index cd69b2d82ad..9e84a4fb647 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -70,7 +70,7 @@ impl Persistence { } } - /// If snapshots are enabled, get the snapshot repository they are stored in. + /// If snapshots are enabled, get the [SnapshotRepo] they are stored in. pub fn snapshot_repo(&self) -> Option> { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } @@ -157,9 +157,7 @@ impl PersistenceProvider for LocalPersistenceProvider { let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| { - SnapshotWorker::new_with_repository(repo, snapshot::Compression::Enabled, Runtime::tokio_current()) - })?; + .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio_current()))?; let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index df25edb87c4..f938efc71c1 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -241,14 +241,12 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// - snapshots + /// /// - `snapshot_repo` /// - /// Optional snapshot persistence and background snapshot execution, - /// carried through [`Persistence`]. + /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. - /// If restoring from an existing database, the snapshot repository must - /// store views of the same sequence of TXes as the `history`. - /// + /// If restoring from an existing database, the `snapshot_repo` must + /// store views of the same sequence of TXes as the `history` /// - `metrics_recorder_queue` /// /// The send side of a queue for recording transaction metrics. @@ -489,7 +487,7 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &(impl SnapshotRepo + ?Sized), + snapshot_repo: &DynSnapshotRepo, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { @@ -623,7 +621,7 @@ impl RelationalDB { } } } - log::info!("[{database_identity}] DATABASE: no usable snapshot in store"); + log::info!("[{database_identity}] DATABASE: no usable snapshot in snapshot repo"); // If we didn't find a snapshot and the commitlog doesn't start at the // zero-th commit (e.g. due to archiving), there is no way to restore @@ -2185,11 +2183,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - Runtime::tokio(rt.clone()), - ) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; @@ -2315,11 +2309,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new_with_repository( - repo, - snapshot::Compression::Enabled, - Runtime::tokio(rt.clone()), - ) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; @@ -2363,7 +2353,7 @@ pub mod tests_utils { Arc::new(|_, _| i64::MAX) } - pub fn take_snapshot(&self, repo: &SnapshotRepository) -> Result, DBError> { + pub fn take_snapshot(&self, repo: &DynSnapshotRepo) -> Result, DBError> { Ok(self.inner.take_snapshot(repo)?) } } diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 4e3428b20f8..4a78100f1e0 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo, SnapshotRepository}; +use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; @@ -60,7 +60,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repo: Arc, + snapshot_repository: Arc, } impl SnapshotWorker { @@ -93,7 +93,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repo, + snapshot_repository: snapshot_repo, } } @@ -107,9 +107,9 @@ impl SnapshotWorker { .expect("snapshot worker panicked"); } - /// Get the snapshot repository this worker is operating on. + /// Get the snapshot repo this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repo.clone() + self.snapshot_repository.clone() } /// Request a snapshot to be taken. @@ -143,40 +143,6 @@ impl SnapshotWorker { } } -impl SnapshotWorker { - pub fn new_with_repository( - snapshot_repository: Arc, - compression: Compression, - runtime: Runtime, - ) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); - let (snapshot_created, _) = watch::channel(latest_snapshot); - let (request_tx, request_rx) = mpsc::unbounded(); - - let actor = SnapshotWorkerActor { - snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), - snapshot_created: snapshot_created.clone(), - metrics: SnapshotMetrics::new(database), - runtime: runtime.clone(), - compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), - metrics: CompressionMetrics::new(database), - stats: <_>::default(), - runtime: runtime.clone(), - }), - }; - runtime.spawn(actor.run()); - - Self { - snapshot_created, - request_snapshot: request_tx, - snapshot_repo: snapshot_repository, - } - } -} - struct SnapshotMetrics { snapshot_timing_total: Histogram, snapshot_timing_inner: Histogram, diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 41097b33abd..81d67bc2ec5 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -10,7 +10,7 @@ use spacetimedb::{ snapshot::{self, SnapshotWorker}, }, error::DBError, - Identity, + runtime, Identity, }; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_datastore::locking_tx_datastore::datastore::Locking; @@ -227,14 +227,14 @@ impl SourceSnapshot { async fn create_snapshot(repo: Arc) -> anyhow::Result { let start = Instant::now(); - let rt = tokio::runtime::Handle::current(); + let rt = runtime::Runtime::tokio_current(); // NOTE: `_db` needs to stay alive until the snapshot is taken, // because the snapshot worker holds only a weak reference. let (mut watch, _db) = spawn_blocking(|| { let persistence = Persistence { durability: Arc::new(NoDurability::default()), disk_size: Arc::new(|| Ok(<_>::default())), - snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled)), + snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled, rt.clone())), runtime: rt, }; let db = TestDB::open_db(EmptyHistory::new(), Some(persistence), None, 0)?; From 7d1e21d7f148179b09458ca54ef8a82a59052c47 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 00:42:18 +0530 Subject: [PATCH 13/40] minor fix --- crates/core/src/db/durability.rs | 3 +- crates/core/src/db/persistence.rs | 11 ++- crates/core/src/db/relational_db.rs | 23 +++-- crates/core/src/db/snapshot.rs | 3 +- crates/core/src/lib.rs | 1 - crates/core/src/runtime.rs | 4 - .../subscription/module_subscription_actor.rs | 2 +- crates/standalone/src/subcommands/start.rs | 97 ++++++------------- 8 files changed, 57 insertions(+), 87 deletions(-) delete mode 100644 crates/core/src/runtime.rs diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index 965196c97ee..d712630a63a 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -10,7 +10,8 @@ use spacetimedb_durability::Transaction; use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; -use crate::{db::persistence::Durability, runtime::Runtime}; +use crate::db::persistence::Durability; +use spacetimedb_runtime::Runtime; pub(super) fn request_durability( durability: &Durability, diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index 9e84a4fb647..dbd7e42c22c 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -6,7 +6,8 @@ use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; -use crate::{messages::control_db::Database, runtime::Runtime, util::asyncify}; +use crate::{messages::control_db::Database, util::asyncify}; +use spacetimedb_runtime::Runtime; use super::{ relational_db::{self, Txdata}, @@ -152,13 +153,15 @@ impl PersistenceProvider for LocalPersistenceProvider { async fn persistence(&self, database: &Database, replica_id: u64) -> anyhow::Result { let replica_dir = self.data_dir.replica(replica_id); let snapshot_dir = replica_dir.snapshots(); + let runtime = Runtime::tokio_current(); let database_identity = database.database_identity; let snapshot_worker = asyncify(move || relational_db::open_snapshot_repo(snapshot_dir, database_identity, replica_id)) .await - .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio_current()))?; - let (durability, disk_size) = relational_db::local_durability(replica_dir, Some(&snapshot_worker)).await?; + .map(|repo| SnapshotWorker::new(repo, snapshot::Compression::Enabled, runtime.clone()))?; + let (durability, disk_size) = + relational_db::local_durability(replica_dir, runtime.clone(), Some(&snapshot_worker)).await?; tokio::spawn(relational_db::snapshot_watching_commitlog_compressor( snapshot_worker.subscribe(), @@ -171,7 +174,7 @@ impl PersistenceProvider for LocalPersistenceProvider { durability, disk_size, snapshots: Some(snapshot_worker), - runtime: Runtime::tokio_current(), + runtime, }) } } diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index f938efc71c1..347d0509159 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -1,7 +1,6 @@ use crate::db::durability::{request_durability, spawn_close as spawn_durability_close}; use crate::db::MetricsRecorderQueue; use crate::error::{DBError, RestoreSnapshotError}; -use crate::runtime::Runtime; use crate::subscription::ExecutionCounters; use crate::util::asyncify; use crate::worker_metrics::WORKER_METRICS; @@ -45,6 +44,7 @@ use spacetimedb_lib::Identity; use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; +use spacetimedb_runtime::Runtime; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; @@ -54,7 +54,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepo, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -143,8 +143,6 @@ impl Drop for RelationalDB { if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); } - - log::info!("drop done"); } } @@ -241,12 +239,13 @@ impl RelationalDB { /// /// `None` may be passed to obtain an in-memory only database. /// - /// /// - `snapshot_repo` + /// - `snapshot_repo` /// /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must /// store views of the same sequence of TXes as the `history` + /// /// - `metrics_recorder_queue` /// /// The send side of a queue for recording transaction metrics. @@ -1681,9 +1680,9 @@ const COMMITLOG_COMPRESSION_FORCE_SEGMENT_BACKLOG: usize = 8; /// of the commitlog. pub async fn local_durability( replica_dir: ReplicaDir, + runtime: Runtime, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { - let runtime = Runtime::tokio_current(); let on_new_segment = snapshot_worker.map(|snapshot_worker| { let snapshot_worker = snapshot_worker.clone(); Arc::new(move || { @@ -2188,14 +2187,16 @@ pub mod tests_utils { }) .transpose()?; - let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; + let runtime = Runtime::tokio(rt.clone()); + let (local, disk_size_fn) = + rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: Runtime::tokio(rt), + runtime, }; let (db, _) = RelationalDB::open( @@ -2313,13 +2314,15 @@ pub mod tests_utils { }) }) .transpose()?; - let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), snapshots.as_ref()))?; + let runtime = Runtime::tokio(rt.clone()); + let (local, disk_size_fn) = + rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, snapshots, - runtime: Runtime::tokio(rt), + runtime, }; let db = Self::open_db(history, Some(persistence), None, 0)?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 4a78100f1e0..ca1749bd610 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,7 +17,8 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; -use crate::{runtime::Runtime, worker_metrics::WORKER_METRICS}; +use crate::worker_metrics::WORKER_METRICS; +use spacetimedb_runtime::Runtime; pub type SnapshotDatabaseState = Arc>; diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 4a7246bcbd7..26b35230b1f 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -18,7 +18,6 @@ pub mod estimation; pub mod host; pub mod module_host_context; pub mod replica_context; -pub mod runtime; pub mod startup; pub mod subscription; pub mod util; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs deleted file mode 100644 index 4c55c71dccd..00000000000 --- a/crates/core/src/runtime.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Runtime boundary re-exported for core call sites. - -pub use spacetimedb_runtime::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; -pub use spacetimedb_runtime::{Runtime, RuntimeTimeout}; diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 6ef9c3f055f..f9c9b13ae04 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2103,7 +2103,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: crate::runtime::Runtime::tokio(rt), + runtime: spacetimedb_runtime::Runtime::tokio(rt), }), None, 0, diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index bc8241938d2..50f6db19257 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,18 +1,12 @@ -#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; -#[cfg(not(simulation))] use spacetimedb_pg::pg_server; -#[cfg(not(simulation))] use std::io::{self, Write}; -#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; -#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -21,14 +15,11 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; -#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; -#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -120,7 +111,6 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); - #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -208,26 +198,13 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); - #[cfg(simulation)] - { - let _ = (pg_port, ctx, listen_addr); - anyhow::bail!("standalone start server mode is not supported under simulation"); - } - - #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - #[cfg(not(simulation))] - { - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); - } - #[cfg(not(simulation))] + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); - #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); - #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -273,44 +250,40 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - #[cfg(not(simulation))] - { - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" + ))?; + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" - ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks - } + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - #[cfg(not(simulation))] Ok(()) } @@ -329,7 +302,6 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. -#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -364,13 +336,11 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] -#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } -#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -384,7 +354,6 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } -#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -455,7 +424,6 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. -#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -470,7 +438,6 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. -#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); From a521298cbd17222603e3f72b4554cd629d3c8925 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 13:08:06 +0530 Subject: [PATCH 14/40] fixes --- crates/core/Cargo.toml | 1 - crates/core/src/db/relational_db.rs | 4 +- crates/durability/src/imp/local.rs | 48 +++++--- crates/durability/src/imp/mod.rs | 5 - crates/runtime/src/lib.rs | 177 ++++++++++++++++++++++++++-- crates/runtime/src/sim/executor.rs | 105 +++++++++++++++-- crates/runtime/src/sim/mod.rs | 2 +- 7 files changed, 299 insertions(+), 43 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 2947eccac9d..6e7075536c2 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -134,7 +134,6 @@ tikv-jemalloc-ctl = {workspace = true} [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true, features = ["sched"] } - [features] # Print a warning when doing an unindexed `iter_by_col_range` on a large table. unindexed_iter_by_col_range_warn = [] diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 347d0509159..1cdaa47142c 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -244,7 +244,7 @@ impl RelationalDB { /// The [`SnapshotRepo`] which stores snapshots of this database. /// This is only meaningful if `history` and `durability` are also supplied. /// If restoring from an existing database, the `snapshot_repo` must - /// store views of the same sequence of TXes as the `history` + /// store views of the same sequence of TXes as the `history`. /// /// - `metrics_recorder_queue` /// @@ -2182,7 +2182,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Runtime::tokio(rt.clone())) }) }) .transpose()?; diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 6bced456ca7..ea7d78ae6cb 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -10,6 +10,7 @@ use std::{ use futures::FutureExt as _; use itertools::Itertools as _; use log::{info, trace, warn}; +use scopeguard::ScopeGuard; use spacetimedb_commitlog::{ error, payload::Txdata, @@ -18,9 +19,9 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::{JoinHandle, Runtime}; use thiserror::Error; -use tokio::sync::{oneshot, watch}; +use tokio::sync::watch; use tracing::{instrument, Span}; use crate::{Close, Durability, DurableOffset, History, PreparedTx, TxOffset}; @@ -106,9 +107,9 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// Completion notification for the background actor. Contains `None` once + /// [`JoinHandle`] for the background actor task. Contains `None` once /// consumed by [`Durability::close`]. - actor_done: Mutex>>, + actor: Mutex>>, } /// Commitlog repo backed by [`Fs`] and protected by a [`LockedFile`]. @@ -225,17 +226,12 @@ where T: Encode + Send + Sync + 'static, R: Repo + Send + Sync + 'static, { - fn open_inner( - clog: Arc, R>>, - runtime: Runtime, - opts: Options, - ) -> Result { + fn open_inner(clog: Arc, R>>, runtime: Runtime, opts: Options) -> Result { let queue_capacity = opts.queue_capacity(); let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - let (actor_done_tx, actor_done_rx) = oneshot::channel(); - runtime.spawn( + let actor = runtime.spawn( Actor { clog: clog.clone(), durable_offset: durable_tx, @@ -243,7 +239,7 @@ where batch_capacity: opts.batch_capacity, runtime: runtime.clone(), } - .run(txdata_rx, actor_done_tx), + .run(txdata_rx), ); Ok(Self { @@ -251,7 +247,7 @@ where durable_offset: durable_rx, queue, queue_depth, - actor_done: Mutex::new(Some(actor_done_rx)), + actor: Mutex::new(Some(actor)), }) } @@ -324,7 +320,7 @@ where R: Repo + Send + Sync + 'static, { #[instrument(name = "durability::local::actor", skip_all)] - async fn run(self, transactions_rx: async_channel::Receiver>>, done: oneshot::Sender<()>) { + async fn run(self, transactions_rx: async_channel::Receiver>>) { info!("starting durability actor"); let mut tx_buf = Vec::with_capacity(self.batch_capacity.get()); @@ -373,7 +369,6 @@ where } info!("exiting durability actor"); - let _ = done.send(()); } #[instrument(skip_all)] @@ -426,14 +421,29 @@ where info!("close local durability"); let durable_offset = self.durable_tx_offset(); - let maybe_actor_done = self.actor_done.lock().unwrap().take(); + let maybe_actor = self.actor.lock().unwrap().take(); + // Abort actor if shutdown future is dropped. + let abort = scopeguard::guard( + maybe_actor.as_ref().map(|join_handle| join_handle.abort_handle()), + |maybe_abort_handle| { + if let Some(abort_handle) = maybe_abort_handle { + warn!("close future dropped, aborting durability actor"); + abort_handle.abort(); + } + }, + ); self.queue.close(); async move { - if let Some(actor_done) = maybe_actor_done - && actor_done.await.is_err() + if let Some(actor) = maybe_actor + && let Err(e) = actor.await { - warn!("durability actor completion signal dropped"); + // Will print "durability actor: task was cancelled" + // or "durability actor: task panicked [...]" + warn!("durability actor: {e}"); } + // Don't abort if the actor completed. + let _ = ScopeGuard::into_inner(abort); + durable_offset.last_seen() } .boxed() diff --git a/crates/durability/src/imp/mod.rs b/crates/durability/src/imp/mod.rs index 77f0998e6f8..3e00ae21ee1 100644 --- a/crates/durability/src/imp/mod.rs +++ b/crates/durability/src/imp/mod.rs @@ -56,9 +56,4 @@ mod testing { future::ready(*self.durable_offset.borrow()).boxed() } } - - #[cfg(test)] - mod tests { - use super::*; - } } diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 8777409c402..9fe4487b235 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -4,7 +4,14 @@ extern crate alloc; -use core::{fmt, future::Future, time::Duration}; +use core::{ + fmt, + future::Future, + marker::PhantomData, + pin::Pin, + task::{Context, Poll}, + time::Duration, +}; pub mod adapter; #[cfg(feature = "simulation")] @@ -21,6 +28,160 @@ pub enum Runtime { Simulation(sim::Handle), } +pub struct JoinHandle { + inner: JoinHandleInner, +} + +pub struct AbortHandle { + inner: AbortHandleInner, +} + +enum JoinHandleInner { + #[cfg(feature = "tokio")] + Tokio(Option>), + #[cfg(feature = "simulation")] + Simulation(Option>), + Detached(PhantomData), +} + +enum AbortHandleInner { + #[cfg(feature = "tokio")] + Tokio(tokio::task::AbortHandle), + #[cfg(feature = "simulation")] + Simulation(sim::AbortHandle), +} + +#[derive(Debug)] +pub struct JoinError { + inner: JoinErrorInner, +} + +#[derive(Debug)] +enum JoinErrorInner { + #[cfg(feature = "tokio")] + Tokio(tokio::task::JoinError), + #[cfg(feature = "simulation")] + Simulation(sim::JoinError), +} + +impl AbortHandle { + pub fn abort(&self) { + match &self.inner { + #[cfg(feature = "tokio")] + AbortHandleInner::Tokio(handle) => handle.abort(), + #[cfg(feature = "simulation")] + AbortHandleInner::Simulation(handle) => handle.abort(), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime abort handle has no enabled backend"), + } + } +} + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = f; + match &self.inner { + #[cfg(feature = "tokio")] + JoinErrorInner::Tokio(err) => err.fmt(f), + #[cfg(feature = "simulation")] + JoinErrorInner::Simulation(err) => err.fmt(f), + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + _ => unreachable!("runtime join error has no enabled backend"), + } + } +} + +#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl std::error::Error for JoinError {} + +impl JoinHandle { + pub fn abort_handle(&self) -> AbortHandle { + match &self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(Some(handle)) => AbortHandle { + inner: AbortHandleInner::Tokio(handle.abort_handle()), + }, + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(Some(handle)) => AbortHandle { + inner: AbortHandleInner::Simulation(handle.abort_handle()), + }, + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(None) => panic!("runtime join handle aborted after detach"), + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(None) => panic!("runtime join handle aborted after detach"), + JoinHandleInner::Detached(_) => panic!("runtime join handle aborted after completion"), + } + } + + pub fn detach(mut self) { + self.detach_inner(); + } + + fn detach_inner(&mut self) { + match &mut self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(handle) => { + drop(handle.take()); + } + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(handle) => { + if let Some(handle) = handle.take() { + handle.detach(); + } + } + JoinHandleInner::Detached(_) => {} + } + self.inner = JoinHandleInner::Detached(PhantomData); + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = cx; + match &mut self.inner { + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { + inner: JoinErrorInner::Tokio(err), + })), + Poll::Pending => Poll::Pending, + }, + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { + inner: JoinErrorInner::Simulation(err), + })), + Poll::Pending => Poll::Pending, + }, + #[cfg(feature = "tokio")] + JoinHandleInner::Tokio(None) => panic!("runtime join handle polled after detach"), + #[cfg(feature = "simulation")] + JoinHandleInner::Simulation(None) => panic!("runtime join handle polled after detach"), + JoinHandleInner::Detached(_) => panic!("runtime join handle polled after completion"), + } + } +} + +impl Drop for JoinHandle { + fn drop(&mut self) { + self.detach_inner(); + } +} + +impl Unpin for JoinHandle {} + #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct RuntimeTimeout; @@ -54,18 +215,18 @@ impl Runtime { adapter::sim_std::simulation_current() } - pub fn spawn(&self, future: impl Future + Send + 'static) { + pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; match self { #[cfg(feature = "tokio")] - Self::Tokio(handle) => { - handle.spawn(future); - } + Self::Tokio(handle) => JoinHandle { + inner: JoinHandleInner::Tokio(Some(handle.spawn(future))), + }, #[cfg(feature = "simulation")] - Self::Simulation(handle) => { - handle.spawn_on(sim::NodeId::MAIN, future).detach(); - } + Self::Simulation(handle) => JoinHandle { + inner: JoinHandleInner::Simulation(Some(handle.spawn_on(sim::NodeId::MAIN, future))), + }, #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index e0a28afc4ba..597bfcc4e09 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -6,7 +6,7 @@ use core::{ future::Future, pin::Pin, sync::atomic::{AtomicBool, AtomicU64, Ordering}, - task::{Context, Poll}, + task::{Context, Poll, Waker}, time::Duration, }; @@ -247,21 +247,104 @@ impl Handle { /// A spawned simulated task. pub struct JoinHandle { - task: async_task::Task, + task: async_task::Task, NodeId>, + abort: AbortHandle, } impl JoinHandle { + /// Return a handle that can cancel this task without consuming the join + /// handle. + pub fn abort_handle(&self) -> AbortHandle { + self.abort.clone() + } + /// Detach the task so it continues running without awaiting its output. pub fn detach(self) { self.task.detach(); } + + pub(crate) fn poll_join(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.task).poll(cx) + } } impl Future for JoinHandle { type Output = T; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - Pin::new(&mut self.task).poll(cx) + match self.as_mut().poll_join(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(output), + Poll::Ready(Err(err)) => panic!("sim task: {err}"), + Poll::Pending => Poll::Pending, + } + } +} + +#[derive(Clone)] +pub struct AbortHandle { + state: Arc, +} + +impl AbortHandle { + pub fn abort(&self) { + self.state.aborted.store(true, Ordering::Relaxed); + if let Some(waker) = self.state.waker.lock().take() { + waker.wake(); + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct JoinError; + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("task was cancelled") + } +} + +#[cfg(feature = "simulation-std")] +impl std::error::Error for JoinError {} + +struct AbortState { + aborted: AtomicBool, + waker: Mutex>, +} + +impl AbortState { + fn new() -> Self { + Self { + aborted: AtomicBool::new(false), + waker: Mutex::new(None), + } + } +} + +struct Abortable { + future: F, + abort: AbortHandle, +} + +impl Abortable { + fn new(future: F, abort: AbortHandle) -> Self { + Self { future, abort } + } +} + +impl Future for Abortable { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.abort.state.aborted.load(Ordering::Relaxed) { + return Poll::Ready(Err(JoinError)); + } + + self.abort.state.waker.lock().replace(cx.waker().clone()); + + // SAFETY: the wrapper never moves `future` after being pinned. Only the + // cancellation fields outside `future` are accessed normally. + let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; + future.as_mut().poll(cx).map(Ok) } } @@ -349,13 +432,17 @@ impl Executor { { self.node_state(node); + let abort = AbortHandle { + state: Arc::new(AbortState::new()), + }; + let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = async_task::Builder::new() .metadata(node) - .spawn(move |_| future, move |runnable| sender.send(runnable)); + .spawn(move |_| abortable, move |runnable| sender.send(runnable)); runnable.schedule(); - JoinHandle { task } + JoinHandle { task, abort } } /// Spawn a non-`Send` task on the single-threaded runtime. @@ -366,15 +453,19 @@ impl Executor { { self.node_state(node); + let abort = AbortHandle { + state: Arc::new(AbortState::new()), + }; + let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = unsafe { async_task::Builder::new() .metadata(node) - .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) + .spawn_unchecked(move |_| abortable, move |runnable| sender.send(runnable)) }; runnable.schedule(); - JoinHandle { task } + JoinHandle { task, abort } } #[track_caller] diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 1b778f96d62..9575958f30d 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -11,6 +11,6 @@ mod rng; pub mod time; pub use config::RuntimeConfig; -pub use executor::{yield_now, Handle, JoinHandle, NodeId, Runtime}; +pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; From e59ac1237f3124a8b34f98e157a92044acba7730 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 12 May 2026 13:17:21 +0530 Subject: [PATCH 15/40] fix unneccessary diff --- crates/core/src/db/snapshot.rs | 6 +++--- crates/durability/src/imp/local.rs | 9 ++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index ca1749bd610..8a83ef4318c 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -61,7 +61,7 @@ impl Compression { pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, - snapshot_repository: Arc, + snapshot_repo: Arc, } impl SnapshotWorker { @@ -94,7 +94,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository: snapshot_repo, + snapshot_repo, } } @@ -110,7 +110,7 @@ impl SnapshotWorker { /// Get the snapshot repo this worker is operating on. pub fn snapshot_repo(&self) -> Arc { - self.snapshot_repository.clone() + self.snapshot_repo.clone() } /// Request a snapshot to be taken. diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index ea7d78ae6cb..d56a0fa9f61 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -107,8 +107,8 @@ where /// This is mainly for observability purposes, and can thus be updated with /// relaxed memory ordering. queue_depth: Arc, - /// [`JoinHandle`] for the background actor task. Contains `None` once - /// consumed by [`Durability::close`]. + /// [JoinHandle] for the actor task. Contains `None` if already cancelled + /// (via [Durability::close]). actor: Mutex>>, } @@ -278,11 +278,6 @@ where self.queue_depth.load(Relaxed) } - /// Obtain an iterator over the [`Commit`]s in the underlying log. - pub fn commits_from(&self, offset: TxOffset) -> impl Iterator> + use { - self.clog.commits_from(offset).map_ok(Commit::from) - } - /// Get a list of segment offsets, sorted in ascending order. pub fn existing_segment_offsets(&self) -> io::Result> { self.clog.existing_segment_offsets() From d074cf01467228d04445dcdf88603d2e0606aa47 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 11:07:03 +0530 Subject: [PATCH 16/40] polishing --- Cargo.lock | 1 - crates/runtime/Cargo.toml | 4 +- crates/runtime/README.md | 155 ++++------- crates/runtime/src/adapter/mod.rs | 5 - crates/runtime/src/adapter/sim_std.rs | 361 -------------------------- crates/runtime/src/adapter/tokio.rs | 11 - crates/runtime/src/lib.rs | 151 +++++++---- crates/runtime/src/sim/config.rs | 16 -- crates/runtime/src/sim/executor.rs | 42 ++- crates/runtime/src/sim/mod.rs | 4 +- crates/runtime/src/sim/rng.rs | 42 +-- crates/runtime/src/sim/time/mod.rs | 2 +- crates/runtime/src/sim_std.rs | 327 +++++++++++++++++++++++ crates/runtime/tests/sim_e2e.rs | 93 +++++++ 14 files changed, 591 insertions(+), 623 deletions(-) delete mode 100644 crates/runtime/src/adapter/mod.rs delete mode 100644 crates/runtime/src/adapter/sim_std.rs delete mode 100644 crates/runtime/src/adapter/tokio.rs delete mode 100644 crates/runtime/src/sim/config.rs create mode 100644 crates/runtime/src/sim_std.rs diff --git a/Cargo.lock b/Cargo.lock index 5beda16a9d4..6a3c78946de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8486,7 +8486,6 @@ dependencies = [ "libc", "spin", "tokio", - "tracing", ] [[package]] diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index a86ee9d0fc4..0460432086b 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -16,10 +16,8 @@ tokio = { workspace = true, optional = true } async-task = { version = "4.4", optional = true } spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } -tracing = { workspace = true, optional = true } [features] default = ["tokio"] tokio = ["dep:tokio"] -simulation = ["dep:async-task", "dep:spin"] -simulation-std = ["simulation", "dep:libc", "dep:tracing"] +simulation = ["dep:async-task", "dep:spin", "dep:libc"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 576122be42e..d0443dc3cd9 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,127 +1,60 @@ # spacetimedb-runtime -`spacetimedb-runtime` is the runtime boundary shared by SpacetimeDB core code -and DST. The goal is not to emulate all of Tokio. We do not aim to support -`tokio::net`, `tokio::fs`, or arbitrary ecosystem compatibility here. The goal -is much narrower: provide the small amount of execution control that core -database code needs so that it can run under either a deterministic single- -threaded runtime or a hosted adapter. +`spacetimedb-runtime` is runtime boundary that lets SpacetimeDB core code run +under deterministic simulation testing (DST). -The crate is intentionally hybrid. Some parts of the process are naturally -Tokio-owned today, especially networking, subscriptions, and other integration- -heavy infrastructure. DST and selected core/database paths need a different -model: single-threaded, deterministic scheduling, explicit time, and a runtime -that can move toward `no_std + alloc`. This crate exists to support both -execution domains without forcing the whole process onto one scheduler. +DST runs code inside a deterministic simulator that controls nondeterministic +inputs instead of letting them come directly from host environment. Given same +seed, simulator should produce same trace. When it finds a bug, seed should be +enough to reproduce that bug exactly. -## Architecture - -The top-level type in [src/lib.rs](./src/lib.rs) is `Runtime`. It is the small -facade that shared core code should depend on. `Runtime` is not the simulator -itself and it is not Tokio. It is a tagged handle with the backends that matter -to SpacetimeDB: - -- `Runtime::Tokio(TokioHandle)` when the `tokio` feature is enabled -- `Runtime::Simulation(sim::Handle)` when the `simulation` feature is enabled - -Code such as durability and snapshotting should accept or store `Runtime` and -use only the narrow operations exposed there: `spawn`, `spawn_blocking`, and -`timeout`. That keeps shared logic independent of the hosted runtime choice. - -Under that facade, this crate has two layers. - -The first layer is the simulation core under [src/sim](./src/sim). This is the -deterministic single-thread runtime used by DST. The long-term direction for -this layer is `no_std + alloc`, explicit handles, explicit time, and no -dependency on ambient host facilities. - -The second layer is the hosted adapter layer under [src/adapter](./src/adapter). -Today that includes a Tokio adapter and std-hosted simulation conveniences. The -Tokio adapter exists because some production and testing paths still need a real -process runtime. The std-hosted simulation helpers exist because determinism -testing, thread-local convenience APIs, and Unix hooks are useful in hosted -environments even though they are not part of the portable simulation core. - -## Feature Model +For this to work, code under test must not read clocks, randomness, +scheduling, I/O, or network behavior directly from outer environment. Those +effects need interfaces that production can implement with hosted services and +DST can replace with simulated ones. -The crate is organized around features that reflect that layering. +This crate provides the execution-control part of that boundary: spawning, +timeouts, virtual time, deterministic randomness, task scheduling, and fault +decisions. Storage, networking, and replication should be modeled through +higher-level abstractions. -- `simulation` - Enables the deterministic simulation runtime core. This is the part that is - intended to move toward `no_std + alloc`. -- `simulation-std` - Enables std-hosted conveniences layered on top of `simulation`, such as - thread-local current-handle access, determinism replay helpers, and host OS - integration hooks used by DST in a normal process. -- `tokio` - Enables the Tokio-backed hosted adapter and remains part of the default - feature set for now. -- `std` - Enables hosted-only functionality shared by the adapter layer. - -This means “simulation” is not shorthand for “all simulation tooling.” It is -the portable runtime core. Hosted extras live behind `simulation-std`. - -## Simulation Core - -The simulation core lives under [src/sim](./src/sim). - -[src/sim/executor.rs](./src/sim/executor.rs) contains the single-threaded -deterministic executor. It stores ready tasks as `async_task` runnables, uses a -deterministic RNG to choose the next runnable, supports pause/resume by logical -node, and treats “no runnable work and no future timer wakeups” as a hang. - -[src/sim/time.rs](./src/sim/time.rs) contains virtual time. It owns simulated -time state, timer registration, and timeout behavior. The key property is that -time moves only under runtime control, not wall clock control. - -[src/sim/rng.rs](./src/sim/rng.rs) contains deterministic randomness. The -runtime uses this for scheduler choices, and test/workload code can use -`DecisionSource` when it needs deterministic probabilistic decisions. - -The public simulation surface is intentionally explicit: `sim::Runtime`, -`sim::Handle`, `sim::NodeId`, `sim::JoinHandle`, `yield_now`, and the virtual -time and RNG utilities. The portable direction is to make explicit-handle APIs -the main interface, with host-style convenience APIs layered separately. +## Architecture -## Adapter Layer +[src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code +carries. It has two variants: -The adapter layer lives under [src/adapter](./src/adapter). +- `Runtime::Tokio(TokioHandle)` for hosted execution. +- `Runtime::Simulation(sim::Handle)` for deterministic simulation. -[src/adapter/tokio.rs](./src/adapter/tokio.rs) is the Tokio facade. It defines -the hosted Tokio types used by the top-level runtime facade and provides -`current_handle_or_new_runtime()` for production code that may or may not -already be inside a Tokio runtime. +[src/sim](./src/sim) contains simulation core. It is single-threaded and aims +toward `no_std + alloc` over time. This includes: -Std-hosted simulation helpers stay outside the simulation core as well. These -helpers are valuable, but they are adapters around the core, not the core -itself. Examples include thread-local “current runtime” access, determinism -replay helpers, and Unix hooks that prevent simulation from silently escaping -onto real OS threads. +- `executor`: single-threaded task scheduler with deterministic runnable selection. +- `time`: virtual clock, sleeps, and timeouts. +- `rng`: seeded deterministic randomness for scheduler and workload decisions. +- `buggify`: seeded fault-injection decisions. +- `config`: runtime seed and simulator configuration. -## Current Scope +[src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: -This crate is not trying to make the whole of core `no_std` immediately. For -now, crates such as `relational_db`, `snapshot`, `commitlog`, and `datastore` -may still use `tokio::sync` internally. That is acceptable in the short term, -because those synchronization primitives are runtime-agnostic enough for DST and -the current runtime boundary effort is focused on execution control, not total -removal of Tokio-adjacent types from core. +- `block_on` installs thread-local simulation context for hosted tests. +- `check_determinism` replays same seeded workload twice and compares trace. +- libc randomness hooks route entropy requests to runtime RNG while simulation + is active, and warn before delegating to host OS outside simulation. +- Unix thread hooks reject accidental `std::thread::spawn` while simulation is + active. -The longer-term goal is to reduce those dependencies where it materially helps -portability or determinism, but that work is explicitly out of scope for the -first phase of this crate architecture. +Tokio integration is intentionally small and lives directly in +[src/lib.rs](./src/lib.rs). -## Intended Usage +The crate is intentionally hybrid because SpacetimeDB is hybrid. Host-facing +systems such as networking, subscriptions, wasm host glue, auth, process +metrics, and CLI code may continue to use hosted infrastructure. Deep-core and +DST-facing paths should instead depend on `Runtime` or narrower +domain-specific traits passed in by the caller. -Shared core/database code should depend on `Runtime`, not on raw Tokio handles -or simulator internals. DST should construct `sim::Runtime` directly and use it -to drive deterministic test execution. Hosted production/testing code that still -needs Tokio should use the Tokio adapter through `Runtime::tokio(...)`, -`Runtime::tokio_current()`, and `current_handle_or_new_runtime()`. +Feature flags: -The likely end state is still hybrid: core/database execution may eventually run -on the same deterministic single-thread runtime in both DST and selected -production paths, while networking, clients, subscriptions, and other hosted -subsystems continue to live on Tokio. That is a deliberate design choice, not a -temporary inconsistency. +- `tokio`: enables hosted runtime backend and remains in default feature set. +- `simulation`: enables deterministic simulation runtime and hosted `sim_std` + helpers. diff --git a/crates/runtime/src/adapter/mod.rs b/crates/runtime/src/adapter/mod.rs deleted file mode 100644 index a254877d883..00000000000 --- a/crates/runtime/src/adapter/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[cfg(feature = "tokio")] -pub mod tokio; - -#[cfg(feature = "simulation-std")] -pub mod sim_std; diff --git a/crates/runtime/src/adapter/sim_std.rs b/crates/runtime/src/adapter/sim_std.rs deleted file mode 100644 index 2eaa160adec..00000000000 --- a/crates/runtime/src/adapter/sim_std.rs +++ /dev/null @@ -1,361 +0,0 @@ -use alloc::boxed::Box; -use core::{ - cell::{Cell, RefCell}, - future::Future, - ptr, - time::Duration, -}; -use std::sync::OnceLock; - -use crate::sim; - -thread_local! { - static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; - static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; - static STD_RANDOM_SEED: Cell> = const { Cell::new(None) }; - static IN_SIMULATION: Cell = const { Cell::new(false) }; -} - -pub(crate) struct HandleContextGuard { - previous: Option, -} - -pub(crate) struct RngContextGuard { - previous: Option, -} - -pub(crate) struct SimulationThreadGuard { - previous: bool, -} - -pub fn simulation_current() -> crate::Runtime { - crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) -} - -pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { - ensure_rng_hooks_linked(); - if !init_std_random_state(runtime.rng().seed()) { - tracing::warn!("failed to initialize std random state, std HashMap will not be deterministic"); - } - let _handle_context = enter_handle_context(runtime.handle()); - let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(runtime.rng()); - runtime.block_on(future) -} - -pub fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} - -pub fn advance_time(duration: Duration) { - current_handle() - .expect("simulation runtime is not active on this thread") - .advance(duration); -} - -pub fn now() -> Duration { - current_handle().map(|handle| handle.now()).unwrap_or_default() -} - -pub fn sleep(duration: Duration) -> sim::time::Sleep { - current_handle() - .expect("sim::time::sleep polled outside sim runtime") - .sleep(duration) -} - -pub async fn timeout(duration: Duration, future: impl Future) -> Result { - current_handle() - .expect("sim::time::timeout polled outside sim runtime") - .timeout(duration, future) - .await -} - -pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output -where - F: Future + 'static, - F::Output: Send + 'static, -{ - check_determinism_with(seed, make_future) -} - -pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output -where - M: Fn() -> F + Clone + Send + 'static, - F: Future + 'static, - F::Output: Send + 'static, -{ - let first = make_future.clone(); - let log = std::thread::spawn(move || { - let mut runtime = sim::Runtime::new(seed); - runtime.enable_determinism_log(); - block_on(&mut runtime, first()); - runtime - .take_determinism_log() - .expect("determinism log should be enabled") - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap(); - - std::thread::spawn(move || { - let mut runtime = sim::Runtime::new(seed); - runtime.enable_determinism_check(log); - let output = block_on(&mut runtime, make_future()); - runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); - output - }) - .join() - .map_err(|payload| panic_with_seed(seed, payload)) - .unwrap() -} - -pub fn enable_buggify() { - current_handle() - .expect("simulation runtime is not active on this thread") - .enable_buggify(); -} - -pub fn disable_buggify() { - current_handle() - .expect("simulation runtime is not active on this thread") - .disable_buggify(); -} - -pub fn is_buggify_enabled() -> bool { - current_handle().is_some_and(|handle| handle.is_buggify_enabled()) -} - -pub fn buggify() -> bool { - current_handle() - .expect("simulation runtime is not active on this thread") - .buggify() -} - -pub fn buggify_with_prob(probability: f64) -> bool { - current_handle() - .expect("simulation runtime is not active on this thread") - .buggify_with_prob(probability) -} - -pub(crate) fn enter_handle_context(handle: sim::Handle) -> HandleContextGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - HandleContextGuard { previous } -} - -pub(crate) fn enter_simulation_thread() -> SimulationThreadGuard { - let previous = IN_SIMULATION.with(|state| state.replace(true)); - SimulationThreadGuard { previous } -} - -pub(crate) fn enter_rng_context(rng: sim::GlobalRng) -> RngContextGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - RngContextGuard { previous } -} - -fn in_simulation() -> bool { - IN_SIMULATION.with(Cell::get) -} - -fn init_std_random_state(seed: u64) -> bool { - STD_RANDOM_SEED.with(|slot| slot.set(Some(seed))); - let _ = std::collections::hash_map::RandomState::new(); - STD_RANDOM_SEED.with(|slot| slot.replace(None)).is_none() -} - -fn ensure_rng_hooks_linked() { - unsafe { - getentropy(ptr::null_mut(), 0); - } -} - -fn fill_from_seed(buf: *mut u8, buflen: usize, seed: u64) { - if buflen == 0 { - return; - } - let rng = sim::GlobalRng::new(seed); - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); -} - -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); - true - }) -} - -fn panic_with_seed(seed: u64, payload: Box) -> ! { - eprintln!("note: run with --seed {seed} to reproduce this error"); - std::panic::resume_unwind(payload); -} - -impl Drop for HandleContextGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); - } -} - -impl Drop for RngContextGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - -impl Drop for SimulationThreadGuard { - fn drop(&mut self) { - IN_SIMULATION.with(|state| { - state.set(self.previous); - }); - } -} - -#[cfg(unix)] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { - if in_simulation() { - eprintln!("attempt to spawn a system thread in simulation."); - eprintln!("note: use simulator tasks instead."); - return -1; - } - - type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; - static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); - let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); - std::mem::transmute(ptr) - }); - unsafe { original(attr) } -} - -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - #[cfg(target_os = "macos")] - let _ = flags; - - if let Some(seed) = STD_RANDOM_SEED.with(|slot| slot.replace(None)) { - fill_from_seed(buf, buflen, seed); - return buflen as isize; - } - if fill_from_current_rng(buf, buflen) { - return buflen as isize; - } - - #[cfg(target_os = "linux")] - { - type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; - static GETRANDOM: OnceLock = OnceLock::new(); - let original = GETRANDOM.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getrandom"); - std::mem::transmute(ptr) - }); - unsafe { original(buf, buflen, flags) } - } - - #[cfg(target_os = "macos")] - { - type GetentropyFn = unsafe extern "C" fn(*mut u8, usize) -> libc::c_int; - static GETENTROPY: OnceLock = OnceLock::new(); - let original = GETENTROPY.get_or_init(|| unsafe { - let ptr = libc::dlsym(libc::RTLD_NEXT, c"getentropy".as_ptr().cast()); - assert!(!ptr.is_null(), "failed to resolve original getentropy"); - std::mem::transmute(ptr) - }); - match unsafe { original(buf, buflen) } { - -1 => -1, - 0 => buflen as isize, - _ => unreachable!("unexpected getentropy return value"), - } - } - - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - { - let _ = (buf, buflen, flags); - compile_error!("unsupported OS for DST getrandom override"); - } -} - -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { - if buflen > 256 { - return -1; - } - match unsafe { getrandom(buf, buflen, 0) } { - -1 => -1, - _ => 0, - } -} - -#[cfg(target_os = "macos")] -#[unsafe(no_mangle)] -#[inline(never)] -unsafe extern "C" fn CCRandomGenerateBytes(bytes: *mut u8, count: usize) -> i32 { - match unsafe { getrandom(bytes, count, 0) } { - -1 => -1, - _ => 0, - } -} - -#[cfg(test)] -mod tests { - use crate::sim; - - #[test] - #[cfg(unix)] - fn runtime_forbids_system_thread_spawn() { - let mut runtime = sim::Runtime::new(200); - runtime.block_on(async { - let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); - assert!(result.is_err()); - }); - } - - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = sim::GlobalRng::new(20); - let _guard = enter_rng_context(rng.clone()); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); - } - - let expected_rng = sim::GlobalRng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); - } - - #[test] - fn std_hashmap_order_is_seeded_for_runtime_thread() { - fn order_for(seed: u64) -> Vec<(u64, u64)> { - std::thread::spawn(move || { - let _ = init_std_random_state(seed); - (0..12) - .map(|idx| (idx, idx)) - .collect::>() - .into_iter() - .collect() - }) - .join() - .unwrap() - } - - assert_eq!(order_for(30), order_for(30)); - } -} diff --git a/crates/runtime/src/adapter/tokio.rs b/crates/runtime/src/adapter/tokio.rs deleted file mode 100644 index 5d605bba39a..00000000000 --- a/crates/runtime/src/adapter/tokio.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub type TokioHandle = tokio::runtime::Handle; -pub type TokioRuntime = tokio::runtime::Runtime; - -pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { - if let Ok(handle) = TokioHandle::try_current() { - return Ok((handle, None)); - } - - let runtime = TokioRuntime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 9fe4487b235..90345998df5 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(any(feature = "tokio", feature = "simulation-std")), no_std)] +#![cfg_attr(not(any(feature = "tokio", feature = "simulation")), no_std)] //! Runtime and deterministic simulation utilities shared by core and DST. @@ -13,12 +13,25 @@ use core::{ time::Duration, }; -pub mod adapter; #[cfg(feature = "simulation")] pub mod sim; +#[cfg(feature = "simulation")] +pub mod sim_std; #[cfg(feature = "tokio")] -pub use adapter::tokio::{current_handle_or_new_runtime, TokioHandle, TokioRuntime}; +pub type TokioHandle = tokio::runtime::Handle; +#[cfg(feature = "tokio")] +pub type TokioRuntime = tokio::runtime::Runtime; + +#[cfg(feature = "tokio")] +pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { + if let Ok(handle) = TokioHandle::try_current() { + return Ok((handle, None)); + } + + let runtime = TokioRuntime::new()?; + Ok((runtime.handle().clone(), Some(runtime))) +} #[derive(Clone)] pub enum Runtime { @@ -77,99 +90,121 @@ impl AbortHandle { } } -impl fmt::Display for JoinError { +impl JoinErrorInner { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - let _ = f; - match &self.inner { + match self { #[cfg(feature = "tokio")] - JoinErrorInner::Tokio(err) => err.fmt(f), + Self::Tokio(err) => fmt::Display::fmt(err, f), #[cfg(feature = "simulation")] - JoinErrorInner::Simulation(err) => err.fmt(f), - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - _ => unreachable!("runtime join error has no enabled backend"), + Self::Simulation(err) => fmt::Display::fmt(err, f), } } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = f; + #[cfg(any(feature = "tokio", feature = "simulation"))] + return self.inner.fmt(f); + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + unreachable!("runtime join error has no enabled backend") + } +} + +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for JoinError {} -impl JoinHandle { - pub fn abort_handle(&self) -> AbortHandle { - match &self.inner { +impl JoinHandleInner { + fn abort_handle(&self) -> AbortHandle { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(Some(handle)) => AbortHandle { + Self::Tokio(Some(handle)) => AbortHandle { inner: AbortHandleInner::Tokio(handle.abort_handle()), }, #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(Some(handle)) => AbortHandle { + Self::Simulation(Some(handle)) => AbortHandle { inner: AbortHandleInner::Simulation(handle.abort_handle()), }, #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(None) => panic!("runtime join handle aborted after detach"), + Self::Tokio(None) => panic!("runtime join handle aborted after detach"), #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(None) => panic!("runtime join handle aborted after detach"), - JoinHandleInner::Detached(_) => panic!("runtime join handle aborted after completion"), + Self::Simulation(None) => panic!("runtime join handle aborted after detach"), + Self::Detached(_) => panic!("runtime join handle aborted after completion"), } } - pub fn detach(mut self) { - self.detach_inner(); - } - - fn detach_inner(&mut self) { - match &mut self.inner { + fn detach(&mut self) { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(handle) => { + Self::Tokio(handle) => { drop(handle.take()); } #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(handle) => { + Self::Simulation(handle) => { if let Some(handle) = handle.take() { handle.detach(); } } - JoinHandleInner::Detached(_) => {} + Self::Detached(_) => {} } - self.inner = JoinHandleInner::Detached(PhantomData); } -} -impl Future for JoinHandle { - type Output = Result; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - #[cfg(not(any(feature = "tokio", feature = "simulation")))] - let _ = cx; - match &mut self.inner { + fn poll_result(&mut self, cx: &mut Context<'_>) -> Poll> { + match self { #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { - Poll::Ready(Ok(output)) => { - self.inner = JoinHandleInner::Detached(PhantomData); - Poll::Ready(Ok(output)) - } + Self::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Tokio(err), })), Poll::Pending => Poll::Pending, }, #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { - Poll::Ready(Ok(output)) => { - self.inner = JoinHandleInner::Detached(PhantomData); - Poll::Ready(Ok(output)) - } + Self::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Simulation(err), })), Poll::Pending => Poll::Pending, }, #[cfg(feature = "tokio")] - JoinHandleInner::Tokio(None) => panic!("runtime join handle polled after detach"), + Self::Tokio(None) => panic!("runtime join handle polled after detach"), #[cfg(feature = "simulation")] - JoinHandleInner::Simulation(None) => panic!("runtime join handle polled after detach"), - JoinHandleInner::Detached(_) => panic!("runtime join handle polled after completion"), + Self::Simulation(None) => panic!("runtime join handle polled after detach"), + Self::Detached(_) => panic!("runtime join handle polled after completion"), + } + } +} + +impl JoinHandle { + pub fn abort_handle(&self) -> AbortHandle { + self.inner.abort_handle() + } + + pub fn detach(mut self) { + self.detach_inner(); + } + + fn detach_inner(&mut self) { + self.inner.detach(); + self.inner = JoinHandleInner::Detached(PhantomData); + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + #[cfg(not(any(feature = "tokio", feature = "simulation")))] + let _ = cx; + match self.inner.poll_result(cx) { + Poll::Ready(Ok(output)) => { + self.inner = JoinHandleInner::Detached(PhantomData); + Poll::Ready(Ok(output)) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(err)), + Poll::Pending => Poll::Pending, } } } @@ -191,30 +226,32 @@ impl fmt::Display for RuntimeTimeout { } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for RuntimeTimeout {} +#[cfg(feature = "tokio")] impl Runtime { - #[cfg(feature = "tokio")] pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } - #[cfg(feature = "tokio")] pub fn tokio_current() -> Self { Self::tokio(TokioHandle::current()) } +} - #[cfg(feature = "simulation")] +#[cfg(feature = "simulation")] +impl Runtime { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } - #[cfg(feature = "simulation-std")] pub fn simulation_current() -> Self { - adapter::sim_std::simulation_current() + sim_std::simulation_current() } +} +impl Runtime { pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; diff --git a/crates/runtime/src/sim/config.rs b/crates/runtime/src/sim/config.rs deleted file mode 100644 index 92ab8d0fdbc..00000000000 --- a/crates/runtime/src/sim/config.rs +++ /dev/null @@ -1,16 +0,0 @@ -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct RuntimeConfig { - pub seed: u64, -} - -impl RuntimeConfig { - pub const fn new(seed: u64) -> Self { - Self { seed } - } -} - -impl Default for RuntimeConfig { - fn default() -> Self { - Self::new(0) - } -} diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 597bfcc4e09..04abae27020 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -13,10 +13,27 @@ use core::{ use futures_util::FutureExt; use spin::Mutex; -use crate::sim::{time::TimeHandle, Rng, RuntimeConfig}; +use crate::sim::{time::TimeHandle, Rng}; type Runnable = async_task::Runnable; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RuntimeConfig { + pub seed: u64, +} + +impl RuntimeConfig { + pub const fn new(seed: u64) -> Self { + Self { seed } + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new(0) + } +} + /// A unique identifier for a simulated node. #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct NodeId(u64); @@ -303,7 +320,7 @@ impl fmt::Display for JoinError { } } -#[cfg(feature = "simulation-std")] +#[cfg(feature = "simulation")] impl std::error::Error for JoinError {} struct AbortState { @@ -675,7 +692,7 @@ mod tests { let actual = (0..8).map(|_| handle.buggify_with_prob(0.5)).collect::>(); let expected = { - let mut rng = Rng::new(77); + let rng = Rng::new(77); rng.enable_buggify(); (0..8).map(|_| rng.buggify_with_prob(0.5)).collect::>() }; @@ -696,15 +713,14 @@ mod tests { assert!(!runtime.is_buggify_enabled()); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(crate::adapter::sim_std::current_handle().is_none()); + assert!(crate::sim_std::current_handle().is_none()); let mut runtime = Runtime::new(5); - let value = crate::adapter::sim_std::block_on(&mut runtime, async { - let handle = - crate::adapter::sim_std::current_handle().expect("sim handle should be present inside block_on"); + let value = crate::sim_std::block_on(&mut runtime, async { + let handle = crate::sim_std::current_handle().expect("sim handle should be present inside block_on"); let node = handle.create_node(); let captured = std::rc::Rc::new(17); handle @@ -716,16 +732,16 @@ mod tests { }); assert_eq!(value, 17); - assert!(crate::adapter::sim_std::current_handle().is_none()); + assert!(crate::sim_std::current_handle().is_none()); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] fn check_determinism_runs_future_twice() { static CALLS: AtomicUsize = AtomicUsize::new(0); CALLS.store(0, Ordering::SeqCst); - let value = crate::adapter::sim_std::check_determinism(3, || async { + let value = crate::sim_std::check_determinism(3, || async { CALLS.fetch_add(1, Ordering::SeqCst); yield_now().await; 13 @@ -735,14 +751,14 @@ mod tests { assert_eq!(CALLS.load(Ordering::SeqCst), 2); } - #[cfg(feature = "simulation-std")] + #[cfg(feature = "simulation")] #[test] #[should_panic(expected = "non-determinism detected")] fn check_determinism_rejects_different_scheduler_sequence() { static FIRST_RUN: AtomicBool = AtomicBool::new(true); FIRST_RUN.store(true, Ordering::SeqCst); - crate::adapter::sim_std::check_determinism(4, || async { + crate::sim_std::check_determinism(4, || async { if FIRST_RUN.swap(false, Ordering::SeqCst) { yield_now().await; } diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 9575958f30d..4a87c3ef7ac 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -5,12 +5,10 @@ //! deterministic RNG instead of being driven by a package-level async runtime. pub mod buggify; -mod config; mod executor; mod rng; pub mod time; -pub use config::RuntimeConfig; -pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime}; +pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime, RuntimeConfig}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs index 602eae59979..b39219290dd 100644 --- a/crates/runtime/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -9,7 +9,7 @@ pub type Rng = GlobalRng; /// The simulator owns one runtime-wide RNG handle and uses it for scheduler /// choices, probabilistic fault injection, and determinism checks. Hosted /// conveniences such as thread-local current-RNG access and libc random hooks -/// live in `adapter::sim_std`, not here. +/// live in `crate::sim_std`, not here. #[derive(Clone, Debug)] pub struct GlobalRng { inner: Arc>, @@ -200,43 +200,3 @@ fn probability_sample(value: u64, probability: f64) -> bool { fn checksum(value: u64) -> u8 { value.to_ne_bytes().into_iter().fold(0, |acc, byte| acc ^ byte) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn rng_log_check_accepts_same_sequence() { - let first = Rng::new(10); - first.enable_determinism_log(); - let first_values = (0..8).map(|_| first.next_u64()).collect::>(); - let log = first.take_determinism_log().unwrap(); - - let second = Rng::new(10); - second.enable_determinism_check(log); - let second_values = (0..8).map(|_| second.next_u64()).collect::>(); - second.finish_determinism_check().unwrap(); - - assert_eq!(first_values, second_values); - } - - #[test] - fn buggify_is_disabled_by_default() { - let rng = Rng::new(20); - for _ in 0..8 { - assert!(!rng.buggify()); - assert!(!rng.buggify_with_prob(1.0)); - } - } - - #[test] - fn buggify_obeys_enable_and_disable() { - let rng = Rng::new(21); - rng.enable_buggify(); - assert!(rng.is_buggify_enabled()); - assert!(rng.buggify_with_prob(1.0)); - rng.disable_buggify(); - assert!(!rng.is_buggify_enabled()); - assert!(!rng.buggify_with_prob(1.0)); - } -} diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 6210675f638..ed559fa70d3 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -194,7 +194,7 @@ impl fmt::Display for TimeoutElapsed { } } -#[cfg(any(feature = "tokio", feature = "simulation-std"))] +#[cfg(any(feature = "tokio", feature = "simulation"))] impl std::error::Error for TimeoutElapsed {} #[cfg(test)] diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs new file mode 100644 index 00000000000..5f936cc5aa4 --- /dev/null +++ b/crates/runtime/src/sim_std.rs @@ -0,0 +1,327 @@ +//! Std-hosted entry points for running the deterministic simulator in tests. +//! +//! The portable simulator lives in [`crate::sim`]. This module is deliberately +//! host-specific: it installs thread-local context while a simulation is +//! running, checks determinism by replaying a seed in fresh OS threads, and +//! intercepts a few libc calls so std code cannot silently escape determinism. + +use alloc::boxed::Box; +use core::{ + cell::{Cell, RefCell}, + future::Future, + ptr, +}; +use std::sync::OnceLock; + +use crate::sim; + +// Public entry points. + +/// Return the generic runtime facade for the current simulation thread. +/// +/// Prefer passing explicit [`sim::Handle`] values in simulation code. This is a +/// hosted convenience for code paths that already accept [`crate::Runtime`]. +pub fn simulation_current() -> crate::Runtime { + crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) +} + +/// Run a future to completion with std-hosted determinism guards installed. +/// +/// This wraps [`sim::Runtime::block_on`] and is the normal entry point for DST +/// tests that execute inside a hosted process. While the future runs, this +/// function exposes the current simulation handle, routes std randomness +/// through the simulation RNG, and marks the thread as inside simulation so OS +/// thread spawns can be rejected. +pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { + let _handle_context = enter_handle_context(runtime.handle()); + let _system_thread_context = enter_simulation_thread(); + let _rng_context = enter_rng_context(runtime.rng()); + ensure_rng_hooks_linked(); + runtime.block_on(future) +} + +/// Return the current simulation handle if this thread is inside [`block_on`]. +/// +/// This is intentionally the only ambient context accessor. Time, buggify, and +/// task APIs should be reached through the returned handle or through explicit +/// handles passed by the caller. +pub fn current_handle() -> Option { + CURRENT_HANDLE.with(|handle| handle.borrow().clone()) +} + +/// Run the same future factory twice and assert that both runs consume the same +/// deterministic RNG/scheduler trace. +/// +/// Each pass runs on a fresh OS thread so thread-local std state is not shared +/// between the recording and replay passes. +pub fn check_determinism(seed: u64, make_future: M) -> F::Output +where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, +{ + let first = make_future.clone(); + let log = std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_log(); + block_on(&mut runtime, first()); + runtime + .take_determinism_log() + .expect("determinism log should be enabled") + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap(); + + std::thread::spawn(move || { + let mut runtime = sim::Runtime::new(seed); + runtime.enable_determinism_check(log); + let output = block_on(&mut runtime, make_future()); + runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}")); + output + }) + .join() + .map_err(|payload| panic_with_seed(seed, payload)) + .unwrap() +} + +fn panic_with_seed(seed: u64, payload: Box) -> ! { + eprintln!("note: run with --seed {seed} to reproduce this error"); + std::panic::resume_unwind(payload); +} + +// Simulation thread context. + +// Ambient state used only while `sim_std::block_on` is driving a simulation. +// +// The simulator itself stays explicit-handle based. These thread-locals exist +// because std and libc hooks do not accept a `sim::Handle` parameter, and +// because a few hosted test helpers need a current runtime while executing on +// the simulation thread. +thread_local! { + // Lets hosted glue recover the active runtime handle without passing it + // through every call. This should stay a convenience, not the primary API. + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; + // Feeds deterministic bytes to host randomness calls made during an active + // simulation. Every such request advances the runtime RNG trace. + static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; + // Marks the current OS thread as simulation-owned so thread creation hooks + // can reject accidental escapes to the host scheduler. + static IN_SIMULATION: Cell = const { Cell::new(false) }; +} + +struct CurrentHandleGuard { + previous: Option, +} + +struct CurrentRngGuard { + previous: Option, +} + +struct SimulationThreadGuard { + previous: bool, +} + +fn enter_handle_context(handle: sim::Handle) -> CurrentHandleGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); + CurrentHandleGuard { previous } +} + +fn enter_simulation_thread() -> SimulationThreadGuard { + let previous = IN_SIMULATION.with(|state| state.replace(true)); + SimulationThreadGuard { previous } +} + +fn enter_rng_context(rng: sim::GlobalRng) -> CurrentRngGuard { + let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); + CurrentRngGuard { previous } +} + +fn in_simulation() -> bool { + IN_SIMULATION.with(Cell::get) +} + +impl Drop for CurrentHandleGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + *slot.borrow_mut() = self.previous.take(); + }); + } +} + +impl Drop for CurrentRngGuard { + fn drop(&mut self) { + CURRENT_RNG.with(|current| { + current.replace(self.previous.take()); + }); + } +} + +impl Drop for SimulationThreadGuard { + fn drop(&mut self) { + IN_SIMULATION.with(|state| { + state.set(self.previous); + }); + } +} + +// Randomness hook helpers. + +// Make sure our exported random hook is present in the final test binary. +// +// Some platforms only resolve getrandom/getentropy lazily. Calling it with a +// zero-length buffer is a no-op for behavior, but forces the symbol path to be +// linked before simulation code starts depending on it. +fn ensure_rng_hooks_linked() { + unsafe { + // Force the local getentropy symbol to be linked even if the host std + // library does not call it during this particular test. + getentropy(ptr::null_mut(), 0); + } +} + +// Fill bytes from the current runtime RNG when host code asks for randomness +// during an active simulation. +// +// This is the intentional deterministic substitute for OS randomness. If no +// simulation RNG is installed, the caller is outside `sim_std::block_on` and +// the libc hook should warn before delegating to the host OS. +fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { + CURRENT_RNG.with(|current| { + let Some(rng) = current.borrow().clone() else { + return false; + }; + if buflen == 0 { + return true; + } + let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; + rng.fill_bytes(buf); + true + }) +} + +// Thread hook. + +// Hook Unix thread creation by interposing `pthread_attr_init`. +// +// `std::thread::Builder::spawn` initializes pthread attributes before creating +// the thread. Returning an error here while simulation is active makes hidden +// OS thread creation fail early, before host scheduling can affect replay. +// Outside simulation, this delegates to the real libc symbol through `RTLD_NEXT`. +#[cfg(unix)] +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int { + // std::thread enters libc through pthread_attr_init on Unix. Refusing that + // call while in simulation keeps hidden OS scheduling out of DST. + if in_simulation() { + eprintln!("attempt to spawn a system thread in simulation."); + eprintln!("note: use simulator tasks instead."); + return -1; + } + + type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int; + static PTHREAD_ATTR_INIT: OnceLock = OnceLock::new(); + let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe { + // `RTLD_NEXT` skips this interposed function and finds the libc + // implementation that would have been called without the simulator. + let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init"); + std::mem::transmute(ptr) + }); + unsafe { original(attr) } +} + +// Randomness syscall hooks. + +// Hook OS randomness by interposing `getrandom`. +// +// Code running inside simulation consumes bytes from the runtime RNG. Code +// outside simulation warns and falls back to host randomness so hosted test +// code continues to work. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { + if fill_from_current_rng(buf, buflen) { + // Randomness requested while a simulation is active is deterministic + // and advances the runtime RNG trace. + return buflen as isize; + } + + eprintln!("warning: randomness requested outside simulation; delegating to host OS"); + unsafe { real_getrandom()(buf, buflen, flags) } +} + +#[cfg(target_os = "linux")] +fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize { + type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize; + static GETRANDOM: OnceLock = OnceLock::new(); + *GETRANDOM.get_or_init(|| unsafe { + let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast()); + assert!(!ptr.is_null(), "failed to resolve original getrandom"); + std::mem::transmute(ptr) + }) +} + +#[cfg(not(target_os = "linux"))] +fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize { + compile_error!("unsupported OS for DST getrandom override"); +} + +// Hook `getentropy` and route it through the same deterministic path as +// `getrandom`. +// +// The 256-byte limit is part of the getentropy contract. Keeping this wrapper +// small means all entropy decisions stay centralized in `getrandom`. +#[unsafe(no_mangle)] +#[inline(never)] +unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { + if buflen > 256 { + return -1; + } + match unsafe { getrandom(buf, buflen, 0) } { + -1 => -1, + _ => 0, + } +} + +#[cfg(test)] +mod tests { + use crate::sim; + + use super::{enter_rng_context, getentropy}; + + #[test] + #[cfg(unix)] + fn runtime_forbids_system_thread_spawn() { + let mut runtime = sim::Runtime::new(200); + super::block_on(&mut runtime, async { + let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {})); + assert!(result.is_err()); + }); + } + + #[test] + fn getentropy_uses_current_sim_rng() { + let rng = sim::GlobalRng::new(20); + let _guard = enter_rng_context(rng.clone()); + + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + + let expected_rng = sim::GlobalRng::new(20); + let mut expected = [0u8; 24]; + expected_rng.fill_bytes(&mut expected); + assert_eq!(actual, expected); + } + + #[test] + fn getentropy_delegates_to_host_randomness_outside_simulation() { + let mut actual = [0u8; 24]; + unsafe { + assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); + } + } +} diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index 18d45b894a2..dea5dda6555 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -2,11 +2,18 @@ use std::{sync::Arc, time::Duration}; +use futures::{ + channel::{mpsc, oneshot}, + StreamExt, +}; use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; use spin::Mutex; #[test] fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { + // Exercises the executor, node pause/resume, and timer wheel together: + // paused node work must not run until resumed, and all nodes must observe + // one shared virtual clock. let mut runtime = Runtime::new(101); let handle = runtime.handle(); let node_a = runtime.create_node(); @@ -52,8 +59,91 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { assert_eq!(runtime.elapsed(), Duration::from_millis(3)); } +#[test] +fn client_server_request_response_uses_virtual_time() { + // Models a small client/server exchange without real networking: the client + // sends requests over an in-memory channel, and the server replies after + // deterministic virtual latency on a different simulated node. + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + struct Response { + id: u64, + value: u64, + at: Duration, + } + + struct Request { + id: u64, + input: u64, + respond_to: oneshot::Sender, + } + + let mut runtime = Runtime::new(404); + let handle = runtime.handle(); + let client_node = runtime.create_node(); + let server_node = runtime.create_node(); + let (request_tx, mut request_rx) = mpsc::unbounded::(); + + let responses = runtime.block_on(async move { + let server_handle = handle.clone(); + let server = handle.spawn_on(server_node, async move { + for _ in 0..3 { + let request = request_rx.next().await.expect("client should send request"); + server_handle.sleep(Duration::from_millis(request.id + 1)).await; + request + .respond_to + .send(Response { + id: request.id, + value: request.input * 10, + at: server_handle.now(), + }) + .expect("client should wait for response"); + } + }); + + let client = handle.spawn_on(client_node, async move { + let mut responses = Vec::new(); + for (id, input) in [(2, 7), (0, 4), (1, 5)] { + let (respond_to, response_rx) = oneshot::channel(); + request_tx + .unbounded_send(Request { id, input, respond_to }) + .expect("server inbox should be open"); + responses.push(response_rx.await.expect("server should reply")); + } + responses + }); + + let responses = client.await; + server.await; + responses + }); + + assert_eq!( + responses, + vec![ + Response { + id: 2, + value: 70, + at: Duration::from_millis(3) + }, + Response { + id: 0, + value: 40, + at: Duration::from_millis(4) + }, + Response { + id: 1, + value: 50, + at: Duration::from_millis(6) + }, + ] + ); + assert_eq!(runtime.elapsed(), Duration::from_millis(6)); +} + #[test] fn runtime_buggify_matches_standalone_rng_sequence() { + // Checks that runtime-owned buggify decisions consume the same seeded RNG + // sequence as an explicit `Rng`, making injected faults replayable by seed. let seed = 77; let runtime = Runtime::new(seed); let expected = Rng::new(seed); @@ -76,6 +166,9 @@ fn runtime_buggify_matches_standalone_rng_sequence() { #[test] fn multi_node_timeout_uses_shared_virtual_clock() { + // Verifies timeout races are driven by virtual time, not wall time: the + // fast node completes at 2ms, then the slow node times out at the shared + // 4ms deadline. let mut runtime = Runtime::new(303); let handle = runtime.handle(); let slow_node = runtime.create_node(); From 9789d707fe7c2a3652fad34ee9c09f31e1968555 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 17:02:23 +0530 Subject: [PATCH 17/40] more polishing --- crates/runtime/README.md | 55 ++++- crates/runtime/src/lib.rs | 14 +- crates/runtime/src/sim/executor.rs | 222 ++++++++++++++---- crates/runtime/src/sim/mod.rs | 4 +- crates/runtime/src/sim/time/mod.rs | 4 +- crates/runtime/src/sim_std.rs | 143 +----------- crates/runtime/tests/sim_e2e.rs | 353 ++++++++++++++++++++--------- 7 files changed, 493 insertions(+), 302 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index d0443dc3cd9..41af4cee621 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -33,28 +33,65 @@ toward `no_std + alloc` over time. This includes: - `time`: virtual clock, sleeps, and timeouts. - `rng`: seeded deterministic randomness for scheduler and workload decisions. - `buggify`: seeded fault-injection decisions. -- `config`: runtime seed and simulator configuration. +- `node`: node builders and node-local scheduling handles. [src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: -- `block_on` installs thread-local simulation context for hosted tests. +- `block_on` installs hosted simulation guards for tests. - `check_determinism` replays same seeded workload twice and compares trace. -- libc randomness hooks route entropy requests to runtime RNG while simulation - is active, and warn before delegating to host OS outside simulation. +- libc randomness hooks warn and delegate if code reaches host entropy. - Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. Tokio integration is intentionally small and lives directly in [src/lib.rs](./src/lib.rs). -The crate is intentionally hybrid because SpacetimeDB is hybrid. Host-facing -systems such as networking, subscriptions, wasm host glue, auth, process -metrics, and CLI code may continue to use hosted infrastructure. Deep-core and -DST-facing paths should instead depend on `Runtime` or narrower -domain-specific traits passed in by the caller. Feature flags: - `tokio`: enables hosted runtime backend and remains in default feature set. - `simulation`: enables deterministic simulation runtime and hosted `sim_std` helpers. + +## Scope and Limitations + +- **Single-threaded runtime.** The simulator exposes interleaving and timeout + bugs, but not bugs that require true parallel execution. The direction is to + keep deep-core code single-threaded or close to thread-per-core; simulating + real parallelism is not planned here. + +- **Nodes are not full processes.** Nodes are separate scheduling domains, but + they still run on one executor. Stronger process boundaries should be + modeled by higher-level DST harnesses. + +- **One shared virtual clock.** Nodes share one clock, so the runtime cannot + model skew or drift. If a test needs mismatched clocks, the harness should + model that above this crate. + +- **No built-in network, storage, or I/O simulation.** This crate provides + deterministic execution primitives only. Higher-level harnesses should model + message delivery, disk behavior, and failures. + +- **Not a Tokio replacement.** This crate does not aim to simulate APIs like + `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level + abstraction boundary. + +- **`spawn_blocking` is only a facade on simulation.** On the simulation + backend it currently delegates to a normal spawned task, so the closure + still runs on the single executor thread and can block runtime progress. The + direction is to avoid relying on blocking-pool semantics in simulated deep + core paths. + +- **Host randomness is not controlled.** `sim_std` warns and delegates if code + reaches OS entropy. The direction is to keep deep-core code and DST + harnesses off host randomness entirely. + +- **Not fully `no_std` or allocation-controlled yet.** The simulation core is + written with a `no_std + alloc` direction in mind, so moving its core + further in that direction should be straightforward. Today, though, hosted + glue still depends on `std`, and the runtime still allocates through normal + Rust container and task paths. Tight control over heap allocation is a + direction, not something this crate enforces yet. + +- **`NodeId` still coexists with `Node`.** The direction is to move callers + toward `Node` and reduce raw `NodeId` use over time. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 90345998df5..429168bc6f9 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -245,10 +245,6 @@ impl Runtime { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } - - pub fn simulation_current() -> Self { - sim_std::simulation_current() - } } impl Runtime { @@ -284,8 +280,16 @@ impl Runtime { Ok(panic_payload) => std::panic::resume_unwind(panic_payload), Err(e) => panic!("Unexpected JoinError: {e}"), }), + // This is only a facade placeholder for simulation today. It + // delegates to a normal simulated task, so the closure still runs + // on the single executor thread and can block overall runtime + // progress. Callers should not expect blocking-pool semantics on + // the simulation backend. #[cfg(feature = "simulation")] - Self::Simulation(handle) => handle.spawn_on(sim::NodeId::MAIN, async move { f() }).await, + Self::Simulation(handle) => handle + .spawn_on(sim::NodeId::MAIN, async move { f() }) + .await + .expect("simulation spawn_blocking task should not be cancelled"), #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), } diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 04abae27020..0c5dd0754ae 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -49,6 +49,79 @@ impl fmt::Display for NodeId { } } +/// Immutable metadata attached to one simulated node. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct NodeConfig { + name: Option, +} + +/// Builder for configuring a simulated node before it is created. +pub struct NodeBuilder { + handle: Handle, + config: NodeConfig, +} + +impl NodeBuilder { + /// Assign a human-readable name to the node. + pub fn name(mut self, name: impl Into) -> Self { + self.config.name = Some(name.into()); + self + } + + /// Create the node with the accumulated configuration. + pub fn build(self) -> Node { + self.handle.build_node(self.config) + } +} + +/// Handle to one simulated node in the runtime. +#[derive(Clone)] +pub struct Node { + id: NodeId, + handle: Handle, + config: Arc, +} + +impl Node { + /// Return the stable identifier for this simulated node. + pub fn id(&self) -> NodeId { + self.id + } + + /// Return the optional human-readable name for this node. + pub fn name(&self) -> Option<&str> { + self.config.name.as_deref() + } + + /// Pause scheduling for this node. + pub fn pause(&self) { + self.handle.pause(self.id); + } + + /// Resume scheduling for this node. + pub fn resume(&self) { + self.handle.resume(self.id); + } + + /// Spawn a `Send` future onto this simulated node. + pub fn spawn(&self, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.handle.spawn_on(self.id, future) + } + + /// Spawn a non-`Send` future onto this simulated node. + pub fn spawn_local(&self, future: F) -> JoinHandle + where + F: Future + 'static, + F::Output: 'static, + { + self.handle.spawn_local_on(self.id, future) + } +} + /// A small single-threaded runtime for DST's top-level future. /// /// futures are scheduled as runnables, the ready queue @@ -95,7 +168,7 @@ impl Runtime { /// /// Nodes are a scheduling/pausing boundary rather than separate executors: /// all nodes still run on the same single-threaded runtime. - pub fn create_node(&self) -> NodeId { + pub fn create_node(&self) -> NodeBuilder { self.handle().create_node() } @@ -164,11 +237,6 @@ impl Runtime { pub(crate) fn finish_determinism_check(&self) -> Result<(), alloc::string::String> { self.executor.rng.finish_determinism_check() } - - #[allow(dead_code)] - pub(crate) fn rng(&self) -> Rng { - self.executor.rng.clone() - } } /// Cloneable access to the simulation executor. @@ -179,8 +247,21 @@ pub struct Handle { impl Handle { /// Create a new simulated node owned by this runtime. - pub fn create_node(&self) -> NodeId { - self.executor.create_node() + pub fn create_node(&self) -> NodeBuilder { + NodeBuilder { + handle: self.clone(), + config: NodeConfig::default(), + } + } + + fn build_node(&self, config: NodeConfig) -> Node { + let id = self.executor.create_node(config.clone()); + let config = self.executor.node_config(id); + Node { + id, + handle: self.clone(), + config, + } } /// Pause scheduling for a node. @@ -286,14 +367,10 @@ impl JoinHandle { } impl Future for JoinHandle { - type Output = T; + type Output = Result; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - match self.as_mut().poll_join(cx) { - Poll::Ready(Ok(output)) => Poll::Ready(output), - Poll::Ready(Err(err)) => panic!("sim task: {err}"), - Poll::Pending => Poll::Pending, - } + self.as_mut().poll_join(cx) } } @@ -373,7 +450,7 @@ impl Future for Abortable { struct Executor { queue: Receiver, sender: Sender, - nodes: spin::Mutex>>, + nodes: spin::Mutex>>, next_node: AtomicU64, rng: Rng, time: TimeHandle, @@ -384,7 +461,7 @@ impl Executor { fn new(config: RuntimeConfig) -> Self { let queue = Queue::new(); let mut nodes = BTreeMap::new(); - nodes.insert(NodeId::MAIN, Arc::new(NodeState::default())); + nodes.insert(NodeId::MAIN, Arc::new(NodeRecord::default())); Self { queue: queue.receiver(), sender: queue.sender(), @@ -419,23 +496,33 @@ impl Executor { self.rng.buggify_with_prob(probability) } - fn create_node(&self) -> NodeId { + fn create_node(&self, config: NodeConfig) -> NodeId { let id = NodeId(self.next_node.fetch_add(1, Ordering::Relaxed)); - self.nodes.lock().insert(id, Arc::new(NodeState::default())); + self.nodes.lock().insert( + id, + Arc::new(NodeRecord { + config: Arc::new(config), + state: NodeState::default(), + }), + ); id } + fn node_config(&self, node: NodeId) -> Arc { + self.node_record(node).config.clone() + } + /// Mark a node as paused so newly selected runnables are buffered. fn pause(&self, node: NodeId) { - self.node_state(node).paused.store(true, Ordering::Relaxed); + self.node_record(node).state.paused.store(true, Ordering::Relaxed); } /// Mark a node as runnable again and requeue any buffered tasks for it. fn resume(&self, node: NodeId) { - let state = self.node_state(node); - state.paused.store(false, Ordering::Relaxed); + let record = self.node_record(node); + record.state.paused.store(false, Ordering::Relaxed); - let mut paused = state.paused_queue.lock(); + let mut paused = record.state.paused_queue.lock(); for runnable in paused.drain(..) { self.sender.send(runnable); } @@ -447,7 +534,7 @@ impl Executor { F: Future + Send + 'static, F::Output: Send + 'static, { - self.node_state(node); + self.assert_known_node(node); let abort = AbortHandle { state: Arc::new(AbortState::new()), @@ -468,7 +555,7 @@ impl Executor { F: Future + 'static, F::Output: 'static, { - self.node_state(node); + self.assert_known_node(node); let abort = AbortHandle { state: Arc::new(AbortState::new()), @@ -521,23 +608,35 @@ impl Executor { fn run_all_ready(&self) { while let Some(runnable) = self.queue.try_recv_random(&self.rng) { let node = *runnable.metadata(); - let state = self.node_state(node); - if state.paused.load(Ordering::Relaxed) { - state.paused_queue.lock().push(runnable); + let record = self.node_record(node); + if record.state.paused.load(Ordering::Relaxed) { + record.state.paused_queue.lock().push(runnable); continue; } + // TODO: Do some time advance here too runnable.run(); } } - /// Look up the scheduling state for a node, panicking if the node is unknown. - fn node_state(&self, node: NodeId) -> Arc { + /// Look up the record for a node, panicking if the node is unknown. + fn node_record(&self, node: NodeId) -> Arc { self.nodes .lock() .get(&node) .cloned() .unwrap_or_else(|| panic!("unknown simulated node {node}")) } + + fn assert_known_node(&self, node: NodeId) { + let _ = self.node_record(node); + } +} + +/// One simulated node's immutable metadata plus scheduler state. +#[derive(Clone, Default)] +struct NodeRecord { + config: Arc, + state: NodeState, } /// Per-node scheduler state shared by tasks assigned to that node. @@ -575,6 +674,7 @@ impl Future for YieldNow { } /// Shared runnable queue used by the simulation executor. +/// TODO: Make it generic over T struct Queue { inner: Arc, } @@ -650,12 +750,12 @@ mod tests { #[test] fn paused_node_does_not_run_until_resumed() { let mut runtime = Runtime::new(1); - let node = runtime.create_node(); - runtime.pause(node); + let node = runtime.create_node().name("paused").build(); + node.pause(); let runs = Arc::new(AtomicUsize::new(0)); let task_runs = Arc::clone(&runs); - let task = runtime.spawn_on(node, async move { + let task = node.spawn(async move { task_runs.fetch_add(1, Ordering::SeqCst); 7 }); @@ -665,8 +765,8 @@ mod tests { }); assert_eq!(runs.load(Ordering::SeqCst), 0); - runtime.resume(node); - assert_eq!(runtime.block_on(task), 7); + node.resume(); + assert_eq!(runtime.block_on(task).expect("paused task should complete"), 7); assert_eq!(runs.load(Ordering::SeqCst), 1); } @@ -676,8 +776,8 @@ mod tests { let handle = runtime.handle(); let value = runtime.block_on(async move { - let node = handle.create_node(); - handle.spawn_on(node, async { 11 }).await + let node = handle.create_node().name("spawned").build(); + node.spawn(async { 11 }).await.expect("spawned task should complete") }); assert_eq!(value, 11); @@ -713,26 +813,50 @@ mod tests { assert!(!runtime.is_buggify_enabled()); } - #[cfg(feature = "simulation")] #[test] - fn current_handle_can_spawn_local_task_inside_runtime() { - assert!(crate::sim_std::current_handle().is_none()); + fn aborted_task_returns_join_error_when_awaited() { + let mut runtime = Runtime::new(8); + let node = runtime.create_node().name("abort").build(); + let task = node.spawn(async move { + yield_now().await; + 99 + }); + task.abort_handle().abort(); + + let err = runtime + .block_on(task) + .expect_err("aborted task should surface JoinError instead of panicking"); + assert_eq!(err, JoinError); + } + #[cfg(feature = "simulation")] + #[test] + fn sim_std_block_on_can_spawn_local_task_with_explicit_handle() { let mut runtime = Runtime::new(5); - let value = crate::sim_std::block_on(&mut runtime, async { - let handle = crate::sim_std::current_handle().expect("sim handle should be present inside block_on"); - let node = handle.create_node(); + let handle = runtime.handle(); + let node = handle.create_node().name("local").build(); + let value = crate::sim_std::block_on(&mut runtime, async move { let captured = std::rc::Rc::new(17); - handle - .spawn_local_on(node, async move { - yield_now().await; - *captured - }) - .await + node.spawn_local(async move { + yield_now().await; + *captured + }) + .await + .expect("spawned local task should complete") }); assert_eq!(value, 17); - assert!(crate::sim_std::current_handle().is_none()); + } + + #[test] + fn node_builder_sets_name() { + let runtime = Runtime::new(9); + let unnamed = runtime.create_node().build(); + let named = runtime.create_node().name("replica-1").build(); + + assert_eq!(unnamed.name(), None); + assert_eq!(named.name(), Some("replica-1")); + assert_ne!(unnamed.id(), named.id()); } #[cfg(feature = "simulation")] diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index 4a87c3ef7ac..e44e2ac6707 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -9,6 +9,8 @@ mod executor; mod rng; pub mod time; -pub use executor::{yield_now, AbortHandle, Handle, JoinError, JoinHandle, NodeId, Runtime, RuntimeConfig}; +pub use executor::{ + yield_now, AbortHandle, Handle, JoinError, JoinHandle, Node, NodeBuilder, NodeId, Runtime, RuntimeConfig, +}; pub(crate) use rng::DeterminismLog; pub use rng::{GlobalRng, Rng}; diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index ed559fa70d3..65a1663429b 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -239,8 +239,8 @@ mod tests { fast_order.lock().push(3); }); - fast.await; - slow.await; + fast.await.expect("fast timer task should complete"); + slow.await.expect("slow timer task should complete"); } }); diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index 5f936cc5aa4..cdb425a2086 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -6,49 +6,23 @@ //! intercepts a few libc calls so std code cannot silently escape determinism. use alloc::boxed::Box; -use core::{ - cell::{Cell, RefCell}, - future::Future, - ptr, -}; +use core::{cell::Cell, future::Future}; use std::sync::OnceLock; use crate::sim; // Public entry points. -/// Return the generic runtime facade for the current simulation thread. -/// -/// Prefer passing explicit [`sim::Handle`] values in simulation code. This is a -/// hosted convenience for code paths that already accept [`crate::Runtime`]. -pub fn simulation_current() -> crate::Runtime { - crate::Runtime::simulation(current_handle().expect("simulation runtime is not active on this thread")) -} - /// Run a future to completion with std-hosted determinism guards installed. /// /// This wraps [`sim::Runtime::block_on`] and is the normal entry point for DST /// tests that execute inside a hosted process. While the future runs, this -/// function exposes the current simulation handle, routes std randomness -/// through the simulation RNG, and marks the thread as inside simulation so OS -/// thread spawns can be rejected. +/// marks the thread as inside simulation so OS thread spawns can be rejected. pub fn block_on(runtime: &mut sim::Runtime, future: F) -> F::Output { - let _handle_context = enter_handle_context(runtime.handle()); let _system_thread_context = enter_simulation_thread(); - let _rng_context = enter_rng_context(runtime.rng()); - ensure_rng_hooks_linked(); runtime.block_on(future) } -/// Return the current simulation handle if this thread is inside [`block_on`]. -/// -/// This is intentionally the only ambient context accessor. Time, buggify, and -/// task APIs should be reached through the returned handle or through explicit -/// handles passed by the caller. -pub fn current_handle() -> Option { - CURRENT_HANDLE.with(|handle| handle.borrow().clone()) -} - /// Run the same future factory twice and assert that both runs consume the same /// deterministic RNG/scheduler trace. /// @@ -94,69 +68,28 @@ fn panic_with_seed(seed: u64, payload: Box) -> ! { // Ambient state used only while `sim_std::block_on` is driving a simulation. // -// The simulator itself stays explicit-handle based. These thread-locals exist -// because std and libc hooks do not accept a `sim::Handle` parameter, and -// because a few hosted test helpers need a current runtime while executing on -// the simulation thread. +// The simulator itself stays explicit-handle based. This thread-local only +// marks whether the current OS thread is owned by a running simulation so +// host thread creation can be rejected. thread_local! { - // Lets hosted glue recover the active runtime handle without passing it - // through every call. This should stay a convenience, not the primary API. - static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; - // Feeds deterministic bytes to host randomness calls made during an active - // simulation. Every such request advances the runtime RNG trace. - static CURRENT_RNG: RefCell> = const { RefCell::new(None) }; // Marks the current OS thread as simulation-owned so thread creation hooks // can reject accidental escapes to the host scheduler. static IN_SIMULATION: Cell = const { Cell::new(false) }; } -struct CurrentHandleGuard { - previous: Option, -} - -struct CurrentRngGuard { - previous: Option, -} - struct SimulationThreadGuard { previous: bool, } -fn enter_handle_context(handle: sim::Handle) -> CurrentHandleGuard { - let previous = CURRENT_HANDLE.with(|slot| slot.borrow_mut().replace(handle)); - CurrentHandleGuard { previous } -} - fn enter_simulation_thread() -> SimulationThreadGuard { let previous = IN_SIMULATION.with(|state| state.replace(true)); SimulationThreadGuard { previous } } -fn enter_rng_context(rng: sim::GlobalRng) -> CurrentRngGuard { - let previous = CURRENT_RNG.with(|current| current.replace(Some(rng))); - CurrentRngGuard { previous } -} - fn in_simulation() -> bool { IN_SIMULATION.with(Cell::get) } -impl Drop for CurrentHandleGuard { - fn drop(&mut self) { - CURRENT_HANDLE.with(|slot| { - *slot.borrow_mut() = self.previous.take(); - }); - } -} - -impl Drop for CurrentRngGuard { - fn drop(&mut self) { - CURRENT_RNG.with(|current| { - current.replace(self.previous.take()); - }); - } -} - impl Drop for SimulationThreadGuard { fn drop(&mut self) { IN_SIMULATION.with(|state| { @@ -165,41 +98,6 @@ impl Drop for SimulationThreadGuard { } } -// Randomness hook helpers. - -// Make sure our exported random hook is present in the final test binary. -// -// Some platforms only resolve getrandom/getentropy lazily. Calling it with a -// zero-length buffer is a no-op for behavior, but forces the symbol path to be -// linked before simulation code starts depending on it. -fn ensure_rng_hooks_linked() { - unsafe { - // Force the local getentropy symbol to be linked even if the host std - // library does not call it during this particular test. - getentropy(ptr::null_mut(), 0); - } -} - -// Fill bytes from the current runtime RNG when host code asks for randomness -// during an active simulation. -// -// This is the intentional deterministic substitute for OS randomness. If no -// simulation RNG is installed, the caller is outside `sim_std::block_on` and -// the libc hook should warn before delegating to the host OS. -fn fill_from_current_rng(buf: *mut u8, buflen: usize) -> bool { - CURRENT_RNG.with(|current| { - let Some(rng) = current.borrow().clone() else { - return false; - }; - if buflen == 0 { - return true; - } - let buf = unsafe { core::slice::from_raw_parts_mut(buf, buflen) }; - rng.fill_bytes(buf); - true - }) -} - // Thread hook. // Hook Unix thread creation by interposing `pthread_attr_init`. @@ -236,19 +134,12 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: // Hook OS randomness by interposing `getrandom`. // -// Code running inside simulation consumes bytes from the runtime RNG. Code -// outside simulation warns and falls back to host randomness so hosted test -// code continues to work. +// This crate no longer tries to make host randomness deterministic. Any such +// request is surfaced with a warning and then delegated to the host OS. #[unsafe(no_mangle)] #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - if fill_from_current_rng(buf, buflen) { - // Randomness requested while a simulation is active is deterministic - // and advances the runtime RNG trace. - return buflen as isize; - } - - eprintln!("warning: randomness requested outside simulation; delegating to host OS"); + eprintln!("warning: randomness requested; delegating to host OS"); unsafe { real_getrandom()(buf, buflen, flags) } } @@ -289,7 +180,7 @@ unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 { mod tests { use crate::sim; - use super::{enter_rng_context, getentropy}; + use super::getentropy; #[test] #[cfg(unix)] @@ -301,22 +192,6 @@ mod tests { }); } - #[test] - fn getentropy_uses_current_sim_rng() { - let rng = sim::GlobalRng::new(20); - let _guard = enter_rng_context(rng.clone()); - - let mut actual = [0u8; 24]; - unsafe { - assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0); - } - - let expected_rng = sim::GlobalRng::new(20); - let mut expected = [0u8; 24]; - expected_rng.fill_bytes(&mut expected); - assert_eq!(actual, expected); - } - #[test] fn getentropy_delegates_to_host_randomness_outside_simulation() { let mut actual = [0u8; 24]; diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index dea5dda6555..3757ee187fa 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -9,25 +9,252 @@ use futures::{ use spacetimedb_runtime::sim::{buggify, Rng, Runtime}; use spin::Mutex; +/// One reply produced by the simulated server. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct Response { + id: u64, + value: u64, + at: Duration, +} + +/// Trace entries recorded by the server so tests can assert schedule/fault outcomes. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ServerEvent { + Received { id: u64, at: Duration }, + Dropped { id: u64, at: Duration }, + Replied { id: u64, at: Duration }, +} + +/// A client request submitted to the simulated server. +struct Request { + id: u64, + input: u64, + respond_to: oneshot::Sender, +} + +/// Complete result of the client/server workload for one seed. +#[derive(Debug, Eq, PartialEq)] +struct ClientServerRun { + responses: Vec<(u64, Option)>, + server_events: Vec, + elapsed: Duration, +} + +/// Checks the "same seed, same trace" side of the client/server workload. +/// Both the client-visible results and the server-side event trace should stay +/// stable for one fixed seed. +#[test] +fn client_server_buggify_injects_deterministic_faults() { + let run = run_buggified_client_server(404); + + assert_eq!( + run.responses, + vec![ + (0, None), + ( + 1, + Some(Response { + id: 1, + value: 50, + at: Duration::from_millis(2), + }), + ), + ( + 2, + Some(Response { + id: 2, + value: 70, + at: Duration::from_millis(3), + }), + ), + (3, None), + ( + 4, + Some(Response { + id: 4, + value: 110, + at: Duration::from_millis(5), + }), + ), + ] + ); + assert_eq!( + run.server_events, + vec![ + ServerEvent::Received { + id: 3, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 0, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 2, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 4, + at: Duration::ZERO, + }, + ServerEvent::Received { + id: 1, + at: Duration::ZERO, + }, + ServerEvent::Dropped { + id: 0, + at: Duration::from_millis(1), + }, + ServerEvent::Replied { + id: 1, + at: Duration::from_millis(2), + }, + ServerEvent::Replied { + id: 2, + at: Duration::from_millis(3), + }, + ServerEvent::Dropped { + id: 3, + at: Duration::from_millis(4), + }, + ServerEvent::Replied { + id: 4, + at: Duration::from_millis(5), + }, + ] + ); + assert_eq!(run.elapsed, Duration::from_millis(5)); +} + +/// Checks the "different seed, different exploration" side of the same +/// client/server workload. The full run result should differ across seeds. +#[test] +fn client_server_buggify_differs_across_seeds() { + let seed_404 = run_buggified_client_server(404); + let seed_405 = run_buggified_client_server(405); + + eprintln!("seed 404: {seed_404:#?}"); + eprintln!("seed 405: {seed_405:#?}"); + assert_ne!(seed_404, seed_405); +} + +/// Fixed request set used by the client workload. +const CLIENT_REQUESTS: [(u64, u64); 5] = [(0, 4), (1, 5), (2, 7), (3, 9), (4, 11)]; + +/// Run a small concurrent client/server workload under one seed. +/// +/// The client submits every request from its own simulated task. The server +/// receives requests in scheduler order, then spawns one worker per request. +/// Each worker sleeps for deterministic virtual latency and may drop the reply +/// based on buggify. +fn run_buggified_client_server(seed: u64) -> ClientServerRun { + let mut runtime = Runtime::new(seed); + buggify::enable(&runtime); + let handle = runtime.handle(); + let client_node = runtime.create_node().name("client").build(); + let server_node = runtime.create_node().name("server").build(); + let (request_tx, mut request_rx) = mpsc::unbounded::(); + let server_events = Arc::new(Mutex::new(Vec::new())); + + let (responses, server_events) = runtime.block_on(async move { + let server_handle = handle.clone(); + let server_events_for_server = Arc::clone(&server_events); + let server = server_node.clone().spawn(async move { + let mut workers = Vec::new(); + for _ in 0..5 { + let request = request_rx.next().await.expect("client should send request"); + server_events_for_server.lock().push(ServerEvent::Received { + id: request.id, + at: server_handle.now(), + }); + + let worker_handle = server_handle.clone(); + let worker_events = Arc::clone(&server_events_for_server); + workers.push(server_node.clone().spawn(async move { + worker_handle.sleep(Duration::from_millis(request.id + 1)).await; + if worker_handle.buggify_with_prob(0.4) { + worker_events.lock().push(ServerEvent::Dropped { + id: request.id, + at: worker_handle.now(), + }); + return; + } + + let response = Response { + id: request.id, + value: request.input * 10, + at: worker_handle.now(), + }; + worker_events.lock().push(ServerEvent::Replied { + id: request.id, + at: response.at, + }); + request + .respond_to + .send(response) + .expect("client should wait for response"); + })); + } + + for worker in workers { + worker.await.expect("server worker should complete"); + } + }); + + let client_outer_node = client_node.clone(); + let client = client_node.spawn(async move { + let mut requests = Vec::new(); + for (id, input) in CLIENT_REQUESTS { + let request_tx = request_tx.clone(); + let client_request_node = client_outer_node.clone(); + requests.push(client_request_node.spawn(async move { + let (respond_to, response_rx) = oneshot::channel(); + request_tx + .unbounded_send(Request { id, input, respond_to }) + .expect("server inbox should be open"); + (id, response_rx.await.ok()) + })); + } + drop(request_tx); + + let mut responses = Vec::new(); + for request in requests { + responses.push(request.await.expect("client request task should complete")); + } + responses + }); + + let responses = client.await.expect("client task should complete"); + server.await.expect("server task should complete"); + (responses, server_events.lock().clone()) + }); + + ClientServerRun { + responses, + server_events, + elapsed: runtime.elapsed(), + } +} + +/// Exercises the executor, node pause/resume, and timer wheel together: +/// paused node work must not run until resumed, and all nodes must observe +/// one shared virtual clock. #[test] fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { - // Exercises the executor, node pause/resume, and timer wheel together: - // paused node work must not run until resumed, and all nodes must observe - // one shared virtual clock. let mut runtime = Runtime::new(101); let handle = runtime.handle(); - let node_a = runtime.create_node(); - let node_b = runtime.create_node(); + let node_a = runtime.create_node().name("a").build(); + let node_b = runtime.create_node().name("b").build(); let events = Arc::new(Mutex::new(Vec::new())); - runtime.pause(node_b); + node_b.pause(); runtime.block_on({ let events = Arc::clone(&events); async move { let a_handle = handle.clone(); let a_events = Arc::clone(&events); - let a = handle.spawn_on(node_a, async move { + let a = node_a.spawn(async move { a_events.lock().push(("a_started", a_handle.now())); a_handle.sleep(Duration::from_millis(3)).await; a_events.lock().push(("a_finished", a_handle.now())); @@ -35,7 +262,7 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { let b_handle = handle.clone(); let b_events = Arc::clone(&events); - let b = handle.spawn_on(node_b, async move { + let b = node_b.spawn(async move { b_events.lock().push(("b_started", b_handle.now())); b_handle.sleep(Duration::from_millis(2)).await; b_events.lock().push(("b_finished", b_handle.now())); @@ -43,10 +270,10 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { handle.sleep(Duration::from_millis(1)).await; events.lock().push(("main_resumed_b", handle.now())); - handle.resume(node_b); + node_b.resume(); - a.await; - b.await; + a.await.expect("node a task should complete"); + b.await.expect("node b task should complete"); } }); @@ -59,91 +286,10 @@ fn multi_node_runtime_coordinates_pause_resume_and_virtual_time() { assert_eq!(runtime.elapsed(), Duration::from_millis(3)); } -#[test] -fn client_server_request_response_uses_virtual_time() { - // Models a small client/server exchange without real networking: the client - // sends requests over an in-memory channel, and the server replies after - // deterministic virtual latency on a different simulated node. - #[derive(Clone, Copy, Debug, Eq, PartialEq)] - struct Response { - id: u64, - value: u64, - at: Duration, - } - - struct Request { - id: u64, - input: u64, - respond_to: oneshot::Sender, - } - - let mut runtime = Runtime::new(404); - let handle = runtime.handle(); - let client_node = runtime.create_node(); - let server_node = runtime.create_node(); - let (request_tx, mut request_rx) = mpsc::unbounded::(); - - let responses = runtime.block_on(async move { - let server_handle = handle.clone(); - let server = handle.spawn_on(server_node, async move { - for _ in 0..3 { - let request = request_rx.next().await.expect("client should send request"); - server_handle.sleep(Duration::from_millis(request.id + 1)).await; - request - .respond_to - .send(Response { - id: request.id, - value: request.input * 10, - at: server_handle.now(), - }) - .expect("client should wait for response"); - } - }); - - let client = handle.spawn_on(client_node, async move { - let mut responses = Vec::new(); - for (id, input) in [(2, 7), (0, 4), (1, 5)] { - let (respond_to, response_rx) = oneshot::channel(); - request_tx - .unbounded_send(Request { id, input, respond_to }) - .expect("server inbox should be open"); - responses.push(response_rx.await.expect("server should reply")); - } - responses - }); - - let responses = client.await; - server.await; - responses - }); - - assert_eq!( - responses, - vec![ - Response { - id: 2, - value: 70, - at: Duration::from_millis(3) - }, - Response { - id: 0, - value: 40, - at: Duration::from_millis(4) - }, - Response { - id: 1, - value: 50, - at: Duration::from_millis(6) - }, - ] - ); - assert_eq!(runtime.elapsed(), Duration::from_millis(6)); -} - +/// Checks that runtime-owned buggify decisions consume the same seeded RNG +/// sequence as an explicit `Rng`, making injected faults replayable by seed. #[test] fn runtime_buggify_matches_standalone_rng_sequence() { - // Checks that runtime-owned buggify decisions consume the same seeded RNG - // sequence as an explicit `Rng`, making injected faults replayable by seed. let seed = 77; let runtime = Runtime::new(seed); let expected = Rng::new(seed); @@ -164,19 +310,19 @@ fn runtime_buggify_matches_standalone_rng_sequence() { assert!(!buggify::should_inject_fault_with_prob(&runtime, 1.0)); } +/// Verifies timeout races are driven by virtual time, not wall time: the fast +/// node completes at 2ms, then the slow node times out at the shared 4ms +/// deadline. #[test] fn multi_node_timeout_uses_shared_virtual_clock() { - // Verifies timeout races are driven by virtual time, not wall time: the - // fast node completes at 2ms, then the slow node times out at the shared - // 4ms deadline. let mut runtime = Runtime::new(303); let handle = runtime.handle(); - let slow_node = runtime.create_node(); - let fast_node = runtime.create_node(); + let slow_node = runtime.create_node().name("slow").build(); + let fast_node = runtime.create_node().name("fast").build(); let output = runtime.block_on(async move { let slow_handle = handle.clone(); - let slow = handle.spawn_on(slow_node, async move { + let slow = slow_node.spawn(async move { slow_handle .timeout(Duration::from_millis(4), async { slow_handle.sleep(Duration::from_millis(10)).await; @@ -186,12 +332,15 @@ fn multi_node_timeout_uses_shared_virtual_clock() { }); let fast_handle = handle.clone(); - let fast = handle.spawn_on(fast_node, async move { + let fast = fast_node.spawn(async move { fast_handle.sleep(Duration::from_millis(2)).await; ("fast-finished", fast_handle.now()) }); - (slow.await, fast.await) + ( + slow.await.expect("slow node task should complete"), + fast.await.expect("fast node task should complete"), + ) }); let (slow, fast) = output; From 8cd609cc44865332d74c36ac70c3945795e9b836 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 17:22:16 +0530 Subject: [PATCH 18/40] update readme --- crates/runtime/README.md | 48 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 41af4cee621..a7b2fdefa71 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -4,13 +4,14 @@ under deterministic simulation testing (DST). DST runs code inside a deterministic simulator that controls nondeterministic -inputs instead of letting them come directly from host environment. Given same +inputs instead of letting them come directly from the OS and real runtime +environment. Given same seed, simulator should produce same trace. When it finds a bug, seed should be enough to reproduce that bug exactly. For this to work, code under test must not read clocks, randomness, scheduling, I/O, or network behavior directly from outer environment. Those -effects need interfaces that production can implement with hosted services and +effects need interfaces that production can implement with real runtime-backed services and DST can replace with simulated ones. This crate provides the execution-control part of that boundary: spawning, @@ -23,11 +24,13 @@ higher-level abstractions. [src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code carries. It has two variants: -- `Runtime::Tokio(TokioHandle)` for hosted execution. +- `Runtime::Tokio(TokioHandle)` for real runtime execution. - `Runtime::Simulation(sim::Handle)` for deterministic simulation. [src/sim](./src/sim) contains simulation core. It is single-threaded and aims -toward `no_std + alloc` over time. This includes: +toward `no_std + alloc` over time. It is written with dependency reduction in +mind, though it still uses a small set of runtime-support crates today. +The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. @@ -35,11 +38,11 @@ toward `no_std + alloc` over time. This includes: - `buggify`: seeded fault-injection decisions. - `node`: node builders and node-local scheduling handles. -[src/sim_std.rs](./src/sim_std.rs) contains hosted glue around simulator: +[src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: -- `block_on` installs hosted simulation guards for tests. +- `block_on` installs simulation guards for tests running in a normal process. - `check_determinism` replays same seeded workload twice and compares trace. -- libc randomness hooks warn and delegate if code reaches host entropy. +- libc randomness hooks warn and delegate if code reaches OS entropy. - Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. @@ -49,8 +52,8 @@ Tokio integration is intentionally small and lives directly in Feature flags: -- `tokio`: enables hosted runtime backend and remains in default feature set. -- `simulation`: enables deterministic simulation runtime and hosted `sim_std` +- `tokio`: enables tokio runtime backend and remains in default feature set. +- `simulation`: enables deterministic simulation runtime and `sim_std` helpers. ## Scope and Limitations @@ -58,15 +61,10 @@ Feature flags: - **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating - real parallelism is not planned here. + real parallelism is out of scope. -- **Nodes are not full processes.** Nodes are separate scheduling domains, but - they still run on one executor. Stronger process boundaries should be - modeled by higher-level DST harnesses. - -- **One shared virtual clock.** Nodes share one clock, so the runtime cannot - model skew or drift. If a test needs mismatched clocks, the harness should - model that above this crate. +- **One shared virtual clock.** Nodes share one clock, so thir clock can not + drift to show mismatched time. - **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model @@ -76,22 +74,18 @@ Feature flags: `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. -- **`spawn_blocking` is only a facade on simulation.** On the simulation +- **`spawn_blocking` is only a facade on simulation runtime.** On the simulation backend it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The - direction is to avoid relying on blocking-pool semantics in simulated deep - core paths. + direction is to avoid relying on blocking-pool semantics. -- **Host randomness is not controlled.** `sim_std` warns and delegates if code - reaches OS entropy. The direction is to keep deep-core code and DST - harnesses off host randomness entirely. +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS + entropy. The direction is to keep application code and testing harnesses off + OS randomness entirely. - **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core - further in that direction should be straightforward. Today, though, hosted + further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. - -- **`NodeId` still coexists with `Node`.** The direction is to move callers - toward `Node` and reduce raw `NodeId` use over time. From 730028f47765eb2f5d91a850f5036240f9dfa553 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:14:51 +0530 Subject: [PATCH 19/40] Runtime -> Handle --- crates/core/src/db/durability.rs | 4 ++-- crates/core/src/db/persistence.rs | 12 ++++++------ crates/core/src/db/relational_db.rs | 15 +++++++-------- crates/core/src/db/snapshot.rs | 10 +++++----- crates/durability/src/imp/local.rs | 16 ++++++++-------- crates/runtime/src/lib.rs | 20 ++++---------------- 6 files changed, 32 insertions(+), 45 deletions(-) diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index d712630a63a..f749f72850a 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -11,7 +11,7 @@ use spacetimedb_lib::Identity; use spacetimedb_sats::ProductValue; use crate::db::persistence::Durability; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; pub(super) fn request_durability( durability: &Durability, @@ -32,7 +32,7 @@ pub(super) fn request_durability( })); } -pub(super) fn spawn_close(durability: Arc, runtime: &Runtime, database_identity: Identity) { +pub(super) fn spawn_close(durability: Arc, runtime: &Handle, database_identity: Identity) { let label = format!("[{database_identity}]"); let runtime = runtime.clone(); runtime.clone().spawn(async move { diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index dbd7e42c22c..ce3ef5d6841 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -7,7 +7,7 @@ use spacetimedb_paths::server::ServerDataDir; use spacetimedb_snapshot::DynSnapshotRepo; use crate::{messages::control_db::Database, util::asyncify}; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; use super::{ relational_db::{self, Txdata}, @@ -43,7 +43,7 @@ pub struct Persistence { /// this type. pub snapshots: Option, /// Runtime onto which durability-related tasks shall be spawned. - pub runtime: Runtime, + pub runtime: Handle, } impl Persistence { @@ -54,14 +54,14 @@ impl Persistence { snapshots: Option, runtime: tokio::runtime::Handle, ) -> Self { - Self::new_with_runtime(durability, disk_size, snapshots, Runtime::tokio(runtime)) + Self::new_with_runtime(durability, disk_size, snapshots, Handle::tokio(runtime)) } pub fn new_with_runtime( durability: impl spacetimedb_durability::Durability + 'static, disk_size: impl Fn() -> io::Result + Send + Sync + 'static, snapshots: Option, - runtime: Runtime, + runtime: Handle, ) -> Self { Self { durability: Arc::new(durability), @@ -101,7 +101,7 @@ impl Persistence { Option>, Option, Option, - Option, + Option, ) { this.map( |Self { @@ -153,7 +153,7 @@ impl PersistenceProvider for LocalPersistenceProvider { async fn persistence(&self, database: &Database, replica_id: u64) -> anyhow::Result { let replica_dir = self.data_dir.replica(replica_id); let snapshot_dir = replica_dir.snapshots(); - let runtime = Runtime::tokio_current(); + let runtime = Handle::tokio_current(); let database_identity = database.database_identity; let snapshot_worker = diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ae76eef632a..e72741577bb 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -44,7 +44,7 @@ use spacetimedb_lib::Identity; use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; -use spacetimedb_runtime::Runtime; +use spacetimedb_runtime::Handle; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; @@ -102,7 +102,7 @@ pub struct RelationalDB { inner: Locking, durability: Option>, - durability_runtime: Option, + durability_runtime: Option, snapshot_worker: Option, row_count_fn: RowCountFn, @@ -136,7 +136,6 @@ impl std::fmt::Debug for RelationalDB { impl Drop for RelationalDB { fn drop(&mut self) { - log::info!("starting drop"); // Attempt to flush the outstanding transactions. if let (Some(durability), Some(runtime)) = (self.durability.take(), self.durability_runtime.take()) { spawn_durability_close(durability, &runtime, self.database_identity); @@ -1673,7 +1672,7 @@ pub type LocalDurability = Arc>; /// of the commitlog. pub async fn local_durability( replica_dir: ReplicaDir, - runtime: Runtime, + runtime: Handle, snapshot_worker: Option<&SnapshotWorker>, ) -> Result<(LocalDurability, DiskSizeFn), DBError> { let on_new_segment = snapshot_worker.map(|snapshot_worker| { @@ -1954,12 +1953,12 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), db_identity, replica_id).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Disabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Handle::tokio(rt.clone())) }) }) .transpose()?; - let runtime = Runtime::tokio(rt.clone()); + let runtime = Handle::tokio(rt.clone()); let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); @@ -2082,11 +2081,11 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Runtime::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Enabled, Handle::tokio(rt.clone())) }) }) .transpose()?; - let runtime = Runtime::tokio(rt.clone()); + let runtime = Handle::tokio(rt.clone()); let (local, disk_size_fn) = rt.block_on(local_durability(root.clone(), runtime.clone(), snapshots.as_ref()))?; let history = local.as_history(); diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 63f582b68b4..c02e5f6246c 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -17,8 +17,8 @@ use spacetimedb_lib::Identity; use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; use tokio::sync::watch; -use crate::{db::snapshot, worker_metrics::WORKER_METRICS}; -use spacetimedb_runtime::Runtime; +use crate::worker_metrics::WORKER_METRICS; +use spacetimedb_runtime::Handle; pub type SnapshotDatabaseState = Arc>; @@ -70,7 +70,7 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Runtime) -> Self { + pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Handle) -> Self { let database = snapshot_repo.database_identity(); let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: Runtime, + runtime: Handle, compression: Option, } @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: Runtime, + runtime: Handle, } impl Compressor { diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index 7a0c29760e1..e3eca56e5d9 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -19,7 +19,7 @@ use spacetimedb_commitlog::{ }; use spacetimedb_fs_utils::lockfile::advisory::{LockError, LockedFile}; use spacetimedb_paths::server::ReplicaDir; -use spacetimedb_runtime::{JoinHandle, Runtime}; +use spacetimedb_runtime::{Handle, JoinHandle}; use thiserror::Error; use tokio::sync::watch; use tracing::{instrument, Span}; @@ -123,7 +123,7 @@ impl Local { /// This is used to capture a snapshot each new segment. pub fn open( replica_dir: ReplicaDir, - runtime: Runtime, + rt: Handle, opts: Options, on_new_segment: Option>, ) -> Result { @@ -138,7 +138,7 @@ impl Local { opts.commitlog, on_new_segment, )?); - Self::open_inner(clog, runtime, opts, Some(lock)) + Self::open_inner(clog, rt, opts, Some(lock)) } } @@ -148,7 +148,7 @@ where R: RepoWithoutLockFile + Send + Sync + 'static, { /// Create a [`Local`] instance backed by the provided commitlog repo. - pub fn open_with_repo(repo: R, rt: Runtime, opts: Options) -> Result { + pub fn open_with_repo(repo: R, rt: Handle, opts: Options) -> Result { info!("open local durability"); let clog = Arc::new(Commitlog::open_with_repo(repo, opts.commitlog)?); Self::open_inner(clog, rt, opts, None) @@ -162,7 +162,7 @@ where { fn open_inner( clog: Arc, R>>, - runtime: Runtime, + rt: Handle, opts: Options, lock: Option, ) -> Result { @@ -170,13 +170,13 @@ where let (queue, txdata_rx) = async_channel::bounded(queue_capacity); let queue_depth = Arc::new(AtomicU64::new(0)); let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset()); - let actor = runtime.spawn( + let actor = rt.spawn( Actor { clog: clog.clone(), durable_offset: durable_tx, queue_depth: queue_depth.clone(), batch_capacity: opts.batch_capacity, - runtime: runtime.clone(), + runtime: rt.clone(), lock, } .run(txdata_rx), @@ -241,7 +241,7 @@ where queue_depth: Arc, batch_capacity: NonZeroUsize, - runtime: Runtime, + runtime: Handle, #[allow(unused)] lock: Option, diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 429168bc6f9..65e35c8793f 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -20,21 +20,9 @@ pub mod sim_std; #[cfg(feature = "tokio")] pub type TokioHandle = tokio::runtime::Handle; -#[cfg(feature = "tokio")] -pub type TokioRuntime = tokio::runtime::Runtime; - -#[cfg(feature = "tokio")] -pub fn current_handle_or_new_runtime() -> std::io::Result<(TokioHandle, Option)> { - if let Ok(handle) = TokioHandle::try_current() { - return Ok((handle, None)); - } - - let runtime = TokioRuntime::new()?; - Ok((runtime.handle().clone(), Some(runtime))) -} #[derive(Clone)] -pub enum Runtime { +pub enum Handle { #[cfg(feature = "tokio")] Tokio(TokioHandle), #[cfg(feature = "simulation")] @@ -230,7 +218,7 @@ impl fmt::Display for RuntimeTimeout { impl std::error::Error for RuntimeTimeout {} #[cfg(feature = "tokio")] -impl Runtime { +impl Handle { pub fn tokio(handle: TokioHandle) -> Self { Self::Tokio(handle) } @@ -241,13 +229,13 @@ impl Runtime { } #[cfg(feature = "simulation")] -impl Runtime { +impl Handle { pub fn simulation(handle: sim::Handle) -> Self { Self::Simulation(handle) } } -impl Runtime { +impl Handle { pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; From 35cbea9baaad81e83034d669ab697141358f9d8d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:29:44 +0530 Subject: [PATCH 20/40] Apply suggestions from code review Co-authored-by: Shubham Mishra Signed-off-by: Shubham Mishra --- crates/commitlog/src/lib.rs | 4 ---- crates/core/src/db/relational_db.rs | 2 +- crates/core/src/db/snapshot.rs | 20 +++++++++---------- .../subscription/module_subscription_actor.rs | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index 075de217ff9..d2f6f20cdd5 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -214,10 +214,6 @@ where } } -impl Commitlog -where - R: Repo, -{ /// Determine the maximum transaction offset considered durable. /// /// The offset is `None` if the log hasn't been flushed to disk yet. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index e72741577bb..fd96c3067c6 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -2081,7 +2081,7 @@ pub mod tests_utils { let snapshots = want_snapshot_repo .then(|| { open_snapshot_repo(root.snapshots(), Identity::ZERO, 0).map(|repo| { - SnapshotWorker::new(repo, snapshot::Compression::Enabled, Handle::tokio(rt.clone())) + SnapshotWorker::new(repo, snapshot::Compression::Disabled, Handle::tokio(rt.clone())) }) }) .transpose()?; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index c02e5f6246c..047da5cc308 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -70,20 +70,20 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repo: Arc, compression: Compression, runtime: Handle) -> Self { - let database = snapshot_repo.database_identity(); - let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repository: Arc, compression: Compression, runtime: Handle) -> Self { + let database = snapshot_repository.database_identity(); + let latest_snapshot = snapshot_repository..latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repo.clone(), + snapshot_repository.: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), runtime: runtime.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repo.clone(), + snapshot_repository.: snapshot_repo.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), runtime: runtime.clone(), @@ -94,7 +94,7 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository: snapshot_repo, + snapshot_repository, } } @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - runtime: Handle, + rt: Handle, compression: Option, } @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - runtime: Handle, + rt: Handle, } impl Compressor { @@ -349,8 +349,8 @@ impl Compressor { let range = start..latest_snapshot; let mut stats = self.stats.take().unwrap_or_default(); - let runtime = self.runtime.clone(); - let (mut stats, res) = runtime + let rt = self.rt.clone(); + let (mut stats, res) = rt .spawn_blocking({ let range = range.clone(); move || { diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index f9c9b13ae04..4c94df74ab8 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2103,7 +2103,7 @@ mod tests { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), snapshots: None, - runtime: spacetimedb_runtime::Runtime::tokio(rt), + runtime: spacetimedb_runtime::Handle::tokio(rt), }), None, 0, From 5af7fd91497b70b2a93ded78b966054dc7dc6e70 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:30:26 +0530 Subject: [PATCH 21/40] Update crates/commitlog/src/lib.rs Signed-off-by: Shubham Mishra --- crates/commitlog/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index d2f6f20cdd5..abc8729c978 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -212,7 +212,6 @@ where inner: RwLock::new(inner), }) } -} /// Determine the maximum transaction offset considered durable. /// From 52783ce598247f25122ac3117b816a784e0a6a1c Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 18:50:59 +0530 Subject: [PATCH 22/40] compile fix --- crates/core/src/db/snapshot.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 047da5cc308..178bbda3d72 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -70,26 +70,26 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression, runtime: Handle) -> Self { + pub fn new(snapshot_repository: Arc, compression: Compression, rt: Handle) -> Self { let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository..latest_snapshot().ok().flatten().unwrap_or(0); + let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repository.: snapshot_repo.clone(), + snapshot_repo: snapshot_repository.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), - runtime: runtime.clone(), + rt: rt.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repository.: snapshot_repo.clone(), + snapshot_repo: snapshot_repository.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), - runtime: runtime.clone(), + rt: rt.clone(), }), }; - runtime.spawn(actor.run()); + rt.spawn(actor.run()); Self { snapshot_created, @@ -172,7 +172,7 @@ struct SnapshotWorkerActor { snapshot_repo: Arc, snapshot_created: watch::Sender, metrics: SnapshotMetrics, - rt: Handle, + rt: Handle, compression: Option, } @@ -224,7 +224,7 @@ impl SnapshotWorkerActor { let inner_timer = self.metrics.snapshot_timing_inner.clone(); let snapshot_repo = self.snapshot_repo.clone(); - let runtime = self.runtime.clone(); + let runtime = self.rt.clone(); let database_identity = self.snapshot_repo.database_identity(); @@ -317,7 +317,7 @@ struct Compressor { snapshot_repo: Arc, metrics: CompressionMetrics, stats: Option, - rt: Handle, + rt: Handle, } impl Compressor { From 30012dbfe71047140fc4bd863d4495f6ba528346 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:23:54 +0530 Subject: [PATCH 23/40] lint --- Cargo.lock | 1 + crates/core/src/db/relational_db.rs | 2 -- crates/snapshot/Cargo.toml | 1 + crates/snapshot/tests/remote.rs | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea1503a9863..e117cf6d300 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8668,6 +8668,7 @@ dependencies = [ "spacetimedb-lib 2.2.0", "spacetimedb-paths", "spacetimedb-primitives 2.2.0", + "spacetimedb-runtime", "spacetimedb-sats 2.2.0", "spacetimedb-schema", "spacetimedb-table", diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index fd96c3067c6..57230e8866b 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -40,8 +40,6 @@ use spacetimedb_lib::db::raw_def::v9::{btree, RawModuleDefV9Builder, RawSql}; use spacetimedb_lib::st_var::StVarValue; use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; -#[cfg(test)] -use spacetimedb_paths::server::SnapshotDirPath; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; use spacetimedb_runtime::Handle; diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml index f9f767ce18e..aa51c4e3bd8 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/snapshot/Cargo.toml @@ -35,6 +35,7 @@ spacetimedb-core = { path = "../core", features = ["test"] } spacetimedb-schema = { path = "../schema" } spacetimedb-datastore = { path = "../datastore", features = ["test"] } spacetimedb-durability = { workspace = true, features = ["test"] } +spacetimedb-runtime = { workspace = true } anyhow.workspace = true env_logger.workspace = true diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 81d67bc2ec5..1c6c51fe8e7 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -10,7 +10,7 @@ use spacetimedb::{ snapshot::{self, SnapshotWorker}, }, error::DBError, - runtime, Identity, + Identity, }; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_datastore::locking_tx_datastore::datastore::Locking; @@ -227,7 +227,7 @@ impl SourceSnapshot { async fn create_snapshot(repo: Arc) -> anyhow::Result { let start = Instant::now(); - let rt = runtime::Runtime::tokio_current(); + let rt = spacetimedb_runtime::Handle::tokio_current(); // NOTE: `_db` needs to stay alive until the snapshot is taken, // because the snapshot worker holds only a weak reference. let (mut watch, _db) = spawn_blocking(|| { From d9f009b32b46afff87bb1cf0e25c2863a790135e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:26:04 +0530 Subject: [PATCH 24/40] fix Cargo.toml --- crates/standalone/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/standalone/Cargo.toml b/crates/standalone/Cargo.toml index 3bc7335625a..180b3a60b4c 100644 --- a/crates/standalone/Cargo.toml +++ b/crates/standalone/Cargo.toml @@ -54,7 +54,7 @@ serde_json.workspace = true sled.workspace = true socket2.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = ["full"] } +tokio.workspace = true tower-http.workspace = true toml.workspace = true tracing = { workspace = true, features = ["release_max_level_debug"] } From 3b767256b72dc2aad1a5a079f82ecb39784014e0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 19:54:56 +0530 Subject: [PATCH 25/40] endlines on README --- crates/runtime/README.md | 85 ++++++++++++---------------------------- 1 file changed, 24 insertions(+), 61 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index a7b2fdefa71..11ae570caa4 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,36 +1,21 @@ # spacetimedb-runtime -`spacetimedb-runtime` is runtime boundary that lets SpacetimeDB core code run -under deterministic simulation testing (DST). +`spacetimedb-runtime` is a runtime boundary that lets SpacetimeDB core code run under deterministic simulation testing (DST). -DST runs code inside a deterministic simulator that controls nondeterministic -inputs instead of letting them come directly from the OS and real runtime -environment. Given same -seed, simulator should produce same trace. When it finds a bug, seed should be -enough to reproduce that bug exactly. +DST runs code inside a deterministic simulator that controls nondeterministic inputs instead of letting them come directly from the OS and real runtime environment. Given the same seed, the simulator should produce the same trace. When it finds a bug, the seed should be enough to reproduce that bug exactly. -For this to work, code under test must not read clocks, randomness, -scheduling, I/O, or network behavior directly from outer environment. Those -effects need interfaces that production can implement with real runtime-backed services and -DST can replace with simulated ones. +For this to work, code under test must not read clocks, randomness, scheduling, I/O, or network behavior directly from the outer environment. Those effects need interfaces that production can implement with real runtime-backed services and DST can replace with simulated ones. -This crate provides the execution-control part of that boundary: spawning, -timeouts, virtual time, deterministic randomness, task scheduling, and fault -decisions. Storage, networking, and replication should be modeled through -higher-level abstractions. +This crate provides the execution-control part of that boundary: spawning, timeouts, virtual time, deterministic randomness, task scheduling, and fault decisions. Storage, networking, and replication should be modeled through higher-level abstractions. ## Architecture -[src/lib.rs](./src/lib.rs) exposes `Runtime`, small runtime handle shared code -carries. It has two variants: +[src/lib.rs](./src/lib.rs) exposes `Runtime`, a small runtime handle shared code carries. It has two variants: - `Runtime::Tokio(TokioHandle)` for real runtime execution. - `Runtime::Simulation(sim::Handle)` for deterministic simulation. -[src/sim](./src/sim) contains simulation core. It is single-threaded and aims -toward `no_std + alloc` over time. It is written with dependency reduction in -mind, though it still uses a small set of runtime-support crates today. -The module includes: +[src/sim](./src/sim) contains the simulation core. It is single-threaded and aims toward `no_std + alloc` over time. It is written with dependency reduction in mind, though it still uses a small set of runtime-support crates today. The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. @@ -41,51 +26,29 @@ The module includes: [src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: - `block_on` installs simulation guards for tests running in a normal process. -- `check_determinism` replays same seeded workload twice and compares trace. +- `check_determinism` replays the same seeded workload twice and compares traces. - libc randomness hooks warn and delegate if code reaches OS entropy. -- Unix thread hooks reject accidental `std::thread::spawn` while simulation is - active. - -Tokio integration is intentionally small and lives directly in -[src/lib.rs](./src/lib.rs). +- Unix thread hooks reject accidental `std::thread::spawn` while simulation is active. +Tokio integration is intentionally small and lives directly in [src/lib.rs](./src/lib.rs). Feature flags: -- `tokio`: enables tokio runtime backend and remains in default feature set. -- `simulation`: enables deterministic simulation runtime and `sim_std` - helpers. +- `tokio`: enables the Tokio runtime backend and remains in the default feature set. +- `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. ## Scope and Limitations -- **Single-threaded runtime.** The simulator exposes interleaving and timeout - bugs, but not bugs that require true parallel execution. The direction is to - keep deep-core code single-threaded or close to thread-per-core; simulating - real parallelism is out of scope. - -- **One shared virtual clock.** Nodes share one clock, so thir clock can not - drift to show mismatched time. - -- **No built-in network, storage, or I/O simulation.** This crate provides - deterministic execution primitives only. Higher-level harnesses should model - message delivery, disk behavior, and failures. - -- **Not a Tokio replacement.** This crate does not aim to simulate APIs like - `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level - abstraction boundary. - -- **`spawn_blocking` is only a facade on simulation runtime.** On the simulation - backend it currently delegates to a normal spawned task, so the closure - still runs on the single executor thread and can block runtime progress. The - direction is to avoid relying on blocking-pool semantics. - -- **OS randomness is not controlled.** `sim_std` warns if code reaches OS - entropy. The direction is to keep application code and testing harnesses off - OS randomness entirely. - -- **Not fully `no_std` or allocation-controlled yet.** The simulation core is - written with a `no_std + alloc` direction in mind, so moving its core - further in that direction should be straightforward. Today, though, `std`/OS - glue still depends on `std`, and the runtime still allocates through normal - Rust container and task paths. Tight control over heap allocation is a - direction, not something this crate enforces yet. +- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. + +- **One shared virtual clock.** Nodes share one clock, so their clocks cannot drift to show mismatched time. + +- **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model message delivery, disk behavior, and failures. + +- **Not a Tokio replacement.** This crate does not aim to simulate APIs like `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. + +- **`spawn_blocking` is only a facade on the simulation runtime.** On the simulation backend, it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. + +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. + +- **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. From 9996a16170696f55d1323db7f130f025fa0cfa74 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Wed, 13 May 2026 20:18:58 +0530 Subject: [PATCH 26/40] comments --- crates/runtime/src/lib.rs | 6 ------ crates/runtime/src/sim/executor.rs | 2 -- crates/runtime/src/sim/mod.rs | 6 ------ crates/runtime/src/sim/time/mod.rs | 2 -- 4 files changed, 16 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 65e35c8793f..f93889e7490 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,9 +1,3 @@ -#![cfg_attr(not(any(feature = "tokio", feature = "simulation")), no_std)] - -//! Runtime and deterministic simulation utilities shared by core and DST. - -extern crate alloc; - use core::{ fmt, future::Future, diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 0c5dd0754ae..43fd9f6d922 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -1,5 +1,3 @@ -//! Minimal asynchronous executor adapted from madsim's `sim/task` loop. - use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; use core::{ fmt, diff --git a/crates/runtime/src/sim/mod.rs b/crates/runtime/src/sim/mod.rs index e44e2ac6707..ccdcc104991 100644 --- a/crates/runtime/src/sim/mod.rs +++ b/crates/runtime/src/sim/mod.rs @@ -1,9 +1,3 @@ -//! Local deterministic simulation runtime. -//! -//! This module is deliberately small, but its executor shape follows madsim's: -//! futures are scheduled as runnable tasks and the ready queue is sampled by a -//! deterministic RNG instead of being driven by a package-level async runtime. - pub mod buggify; mod executor; mod rng; diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 65a1663429b..70a6a037187 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -1,5 +1,3 @@ -//! Virtual time for the local simulation runtime. - mod sleep; use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; From d5992a2eba38ab5e0e372942824376b3dc17dace Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 13:01:07 +0530 Subject: [PATCH 27/40] add extern alloc --- crates/runtime/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index f93889e7490..d2850e87c8f 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,3 +1,4 @@ +extern crate alloc; use core::{ fmt, future::Future, From 6079ef00ae1d96bd9934db4ced9010029922bcfa Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 16:45:55 +0530 Subject: [PATCH 28/40] remove futures dependency --- crates/runtime/Cargo.toml | 7 ++++--- crates/runtime/README.md | 29 ++++++++++++++++++----------- crates/runtime/src/sim/executor.rs | 9 ++++++--- crates/runtime/src/sim/time/mod.rs | 28 ++++++++++++++++------------ crates/runtime/tests/sim_e2e.rs | 16 ++++++++++++++++ 5 files changed, 60 insertions(+), 29 deletions(-) diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 0460432086b..4cd0af60869 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -10,13 +10,14 @@ rust-version.workspace = true workspace = true [dependencies] -futures.workspace = true -futures-util.workspace = true tokio = { workspace = true, optional = true } -async-task = { version = "4.4", optional = true } +async-task = { version = "4.4", default-features = false, optional = true } spin = { version = "0.9", default-features = false, features = ["mutex", "spin_mutex"], optional = true } libc = { version = "0.2", optional = true } +[dev-dependencies] +futures.workspace = true + [features] default = ["tokio"] tokio = ["dep:tokio"] diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 11ae570caa4..970a17889c4 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -10,17 +10,17 @@ This crate provides the execution-control part of that boundary: spawning, timeo ## Architecture -[src/lib.rs](./src/lib.rs) exposes `Runtime`, a small runtime handle shared code carries. It has two variants: +[src/lib.rs](./src/lib.rs) exposes `Handle`, a small runtime handle shared code carries. It has two variants: -- `Runtime::Tokio(TokioHandle)` for real runtime execution. -- `Runtime::Simulation(sim::Handle)` for deterministic simulation. +- `Handle::Tokio(TokioHandle)` for real runtime execution. +- `Handle::Simulation(sim::Handle)` for deterministic simulation. -[src/sim](./src/sim) contains the simulation core. It is single-threaded and aims toward `no_std + alloc` over time. It is written with dependency reduction in mind, though it still uses a small set of runtime-support crates today. The module includes: +[src/sim](./src/sim) contains the simulation core. It is single-threaded and targets `no_std + alloc`. The module includes: - `executor`: single-threaded task scheduler with deterministic runnable selection. - `time`: virtual clock, sleeps, and timeouts. - `rng`: seeded deterministic randomness for scheduler and workload decisions. -- `buggify`: seeded fault-injection decisions. +- `buggify`: fault-injection surface. Calls rng to decide probabilistically whether to inject failures into simulated operations. - `node`: node builders and node-local scheduling handles. [src/sim_std.rs](./src/sim_std.rs) contains `std`/OS glue around the simulator: @@ -37,18 +37,25 @@ Feature flags: - `tokio`: enables the Tokio runtime backend and remains in the default feature set. - `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. -## Scope and Limitations +## Related documents -- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. +- **[DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md)** — tracks nondeterminism surfaces. + +## Design Principles -- **One shared virtual clock.** Nodes share one clock, so their clocks cannot drift to show mismatched time. +- **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. - **No built-in network, storage, or I/O simulation.** This crate provides deterministic execution primitives only. Higher-level harnesses should model message delivery, disk behavior, and failures. - **Not a Tokio replacement.** This crate does not aim to simulate APIs like `tokio::net` or `tokio::fs`. Code that depends on them needs a higher-level abstraction boundary. -- **`spawn_blocking` is only a facade on the simulation runtime.** On the simulation backend, it currently delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. +- **Zero dependency.** The simulation core in `sim/` is already `no_std + alloc`. The `sim_std` module is a thin OS-facing wrapper — the std dependency lives there, not in the simulation core itself. It stays until the application logic above this crate also moves to `no_std`. -- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. +## Current Limitations -- **Not fully `no_std` or allocation-controlled yet.** The simulation core is written with a `no_std + alloc` direction in mind, so moving its core further in that direction should be straightforward. Today, though, `std`/OS glue still depends on `std`, and the runtime still allocates through normal Rust container and task paths. Tight control over heap allocation is a direction, not something this crate enforces yet. + +- **One shared virtual clock.** All simulated nodes share a single clock. This masks bugs related to timing mismatch across machines. + +- **`spawn_blocking` is only a facade on the simulation backend.** It delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. + +- **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor.rs index 43fd9f6d922..c064bca804f 100644 --- a/crates/runtime/src/sim/executor.rs +++ b/crates/runtime/src/sim/executor.rs @@ -8,7 +8,6 @@ use core::{ time::Duration, }; -use futures_util::FutureExt; use spin::Mutex; use crate::sim::{time::TimeHandle, Rng}; @@ -578,7 +577,7 @@ impl Executor { /// nor timers remain, the simulation is considered deadlocked. fn block_on(&self, future: F) -> F::Output { let sender = self.sender.clone(); - let (runnable, task) = unsafe { + let (runnable, mut task) = unsafe { async_task::Builder::new() .metadata(NodeId::MAIN) .spawn_unchecked(move |_| future, move |runnable| sender.send(runnable)) @@ -588,7 +587,11 @@ impl Executor { loop { self.run_all_ready(); if task.is_finished() { - return task.now_or_never().expect("finished task should resolve"); + let waker = Waker::noop(); + return match Pin::new(&mut task).poll(&mut Context::from_waker(&waker)) { + Poll::Ready(output) => output, + Poll::Pending => unreachable!("task.is_finished() was true"), + }; } if self.time.wake_next_timer() { diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 70a6a037187..de0a201d9f2 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -1,9 +1,7 @@ mod sleep; use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; -use core::{fmt, future::Future, task::Waker, time::Duration}; - -use futures_util::{select_biased, FutureExt}; +use core::{fmt, future::Future, pin::pin, task::{Poll, Waker}, time::Duration}; use sleep::wake_all; use spin::Mutex; @@ -117,18 +115,24 @@ impl TimeHandle { /// Race a future against a virtual-time sleep. /// - /// This is implemented as `future` versus `sleep(duration)` using a biased - /// select. If both become ready in the same simulated step, the main - /// future wins the tie so completion beats timeout deterministically. + /// Uses a biased `poll_fn` that polls `future` before `sleep`. If both are + /// ready in the same step, the main future wins — completion beats timeout + /// deterministically. pub async fn timeout(&self, duration: Duration, future: impl Future) -> Result { let sleep = self.sleep(duration); - futures::pin_mut!(future); - futures::pin_mut!(sleep); + let mut future = pin!(future); + let mut sleep = pin!(sleep); - select_biased! { - output = future.fuse() => Ok(output), - () = sleep.fuse() => Err(TimeoutElapsed { duration }), - } + core::future::poll_fn(|cx| { + if let Poll::Ready(output) = future.as_mut().poll(cx) { + return Poll::Ready(Ok(output)); + } + if let Poll::Ready(()) = sleep.as_mut().poll(cx) { + return Poll::Ready(Err(TimeoutElapsed { duration })); + } + Poll::Pending + }) + .await } } diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index 3757ee187fa..1f505696801 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -148,19 +148,23 @@ const CLIENT_REQUESTS: [(u64, u64); 5] = [(0, 4), (1, 5), (2, 7), (3, 9), (4, 11 /// Each worker sleeps for deterministic virtual latency and may drop the reply /// based on buggify. fn run_buggified_client_server(seed: u64) -> ClientServerRun { + // --- setup: runtime, buggify, two nodes, and communication channels --- let mut runtime = Runtime::new(seed); buggify::enable(&runtime); let handle = runtime.handle(); let client_node = runtime.create_node().name("client").build(); let server_node = runtime.create_node().name("server").build(); + // mpsc channel: client tasks send Request messages to the server task let (request_tx, mut request_rx) = mpsc::unbounded::(); let server_events = Arc::new(Mutex::new(Vec::new())); let (responses, server_events) = runtime.block_on(async move { + // --- server: receive 5 requests, spawn one worker per request --- let server_handle = handle.clone(); let server_events_for_server = Arc::clone(&server_events); let server = server_node.clone().spawn(async move { let mut workers = Vec::new(); + // Receive all 5 requests before processing any replies for _ in 0..5 { let request = request_rx.next().await.expect("client should send request"); server_events_for_server.lock().push(ServerEvent::Received { @@ -168,10 +172,13 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { at: server_handle.now(), }); + // --- server worker: simulate latency, then drop or reply based on buggify --- let worker_handle = server_handle.clone(); let worker_events = Arc::clone(&server_events_for_server); workers.push(server_node.clone().spawn(async move { + // Deterministic virtual latency: each request id has a distinct sleep worker_handle.sleep(Duration::from_millis(request.id + 1)).await; + // buggify decides whether to drop this request (40% probability) if worker_handle.buggify_with_prob(0.4) { worker_events.lock().push(ServerEvent::Dropped { id: request.id, @@ -180,6 +187,7 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { return; } + // No fault injected: send the reply let response = Response { id: request.id, value: request.input * 10, @@ -196,14 +204,17 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { })); } + // Wait for all server workers to complete for worker in workers { worker.await.expect("server worker should complete"); } }); + // --- client: spawn one task per request, send them to server, collect responses --- let client_outer_node = client_node.clone(); let client = client_node.spawn(async move { let mut requests = Vec::new(); + // Spawn a task for each request so they submit concurrently for (id, input) in CLIENT_REQUESTS { let request_tx = request_tx.clone(); let client_request_node = client_outer_node.clone(); @@ -212,11 +223,14 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { request_tx .unbounded_send(Request { id, input, respond_to }) .expect("server inbox should be open"); + // Await the server's reply (None if the server dropped this request) (id, response_rx.await.ok()) })); } + // All requests sent, close the channel so the server loop terminates drop(request_tx); + // Collect responses in spawn order let mut responses = Vec::new(); for request in requests { responses.push(request.await.expect("client request task should complete")); @@ -224,11 +238,13 @@ fn run_buggified_client_server(seed: u64) -> ClientServerRun { responses }); + // Drive both client and server to completion let responses = client.await.expect("client task should complete"); server.await.expect("server task should complete"); (responses, server_events.lock().clone()) }); + // --- package the results: client responses, server trace, and total virtual time --- ClientServerRun { responses, server_events, From 8601d78ad44c4887b389e7a8c8fd3dd0bec02e0e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 01:14:54 +0530 Subject: [PATCH 29/40] coverage matrics --- crates/runtime/DETERMINISM_COVERAGE.md | 58 ++++++++++++++++++++++++++ crates/runtime/README.md | 2 + crates/runtime/src/sim_std.rs | 1 + 3 files changed, 61 insertions(+) create mode 100644 crates/runtime/DETERMINISM_COVERAGE.md diff --git a/crates/runtime/DETERMINISM_COVERAGE.md b/crates/runtime/DETERMINISM_COVERAGE.md new file mode 100644 index 00000000000..4df6c2a6df9 --- /dev/null +++ b/crates/runtime/DETERMINISM_COVERAGE.md @@ -0,0 +1,58 @@ +# Determinism Coverage + +This document tracks which sources of nondeterminism are under control in `spacetimedb-runtime`, which ones are only constrained by current architecture, and which ones still escape the simulator boundary. + +It is meant to serve two purposes: + +1. Make the current determinism boundary explicit for runtime code, core crates, and DST harnesses. +2. Provide a place to record and review assumptions when a PR changes that boundary. + +## Status Definitions + +- `Controlled` + The simulator or runtime owns this source of nondeterminism directly. Given the same seed and the same simulated inputs, behavior should replay the same way. + +- `Constrained` + This surface is not fully simulator-controlled, but the current architecture limits how it is used. Replay should remain stable if those constraints continue to hold. + +- `Audited` + This surface is not mechanically controlled. Current usage has been reviewed and is believed not to affect replay, but that guarantee depends on call patterns and can regress. + +- `Known Leak` + This source can currently escape simulator control and affect replay. It should be treated as explicit technical debt or a documented exception. + +- `Out of Scope` + This crate does not try to control this surface. If it matters for DST, it must be modeled by a higher-level abstraction or test harness. + +## Control Matrix + +| Surface | Status | Boundary | Current control or assumption | Failure mode if violated | Required direction | +| --- | --- | --- | --- | --- | --- | +| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | Keep simulated task scheduling inside the sim executor | +| Simulated task lifecycle | Controlled | `runtime::sim::{executor, JoinHandle}` | Spawn, wake, cancel, and join all happen inside simulator-owned scheduling | Cancellation and join behavior diverge across runs | Keep lifecycle transitions on simulator-owned tasks only | +| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | Keep timer progression fully simulator-owned | +| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | Keep simulator-owned randomness explicit and seed-driven | +| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | Keep simulated work on simulator tasks, not OS threads | +| OS entropy | Known Leak | `runtime::sim_std` | Randomness requests warn and then delegate to the OS | Same seed can produce different traces | Add backtrace to warnings, remove call sites, eventually fail closed or fully model the source | +| `HashMap` randomized iteration | Audited | Runtime and caller code | Runtime does not force deterministic hash seeding; correctness must not depend on iteration order | Hidden ordering dependencies cause flaky replay | Prefer ordered maps or explicit sorting where observable order matters | +| `tokio::sync` primitives | Constrained | Core crates above runtime | These can be replay-compatible only when all participating tasks remain simulator-owned and progress stays on simulator-controlled async paths | Wake ordering or blocking semantics diverge once code depends on a real runtime or host-driven progress | Audit per primitive and push deep-core paths toward runtime-owned or single-threaded structures | +| `parking_lot::{}` and `std::sync::{}` | Constrained | Core crates, especially datastore | Safe only where access stays single-threaded or non-contended under DST | Host synchronization leaks nondeterministic acquisition order | Keep out of deep-core execution paths; prefer runtime-owned or single-threaded structures | +| File and network I/O | Out of Scope | Runtime crate | Runtime does not simulate filesystem or network behavior | Real I/O timing, ordering, and errors are not replayable | Model via domain-specific DST abstractions | +| Tokio runtime ownership | Constrained | `spacetimedb_runtime::Handle` / shared core APIs | Shared code uses a narrow runtime boundary instead of concrete Tokio subsystems | Concrete Tokio APIs leak into DST-facing core paths | Keep shared code on runtime or domain abstractions, not raw Tokio services | +| Heap allocation and OOM | Known Leak | Broad, especially deep-core direction | Allocation happens through normal Rust paths; deterministic allocation failure is not modeled | Resource-exhaustion behavior is not reproducible | Move the simulation core and eventually deep-core paths toward `no_std + alloc` with explicit allocation boundaries | +| Snapshot / commitlog / datastore host effects | Out of Scope | Higher-level durability and storage layers | Runtime only provides scheduling, time, and fault-decision primitives | Storage semantics depend on real host behavior unless wrapped | Model durable behavior through domain-specific DST abstractions | + +## Scope Notes + +This document covers the runtime crate and the determinism boundary it exposes to core crates and DST harnesses. + +`Controlled` is the target state for nondeterminism surfaces that must participate directly in deterministic simulation testing. `Constrained` and `Audited` are temporary states: they may be acceptable for a period, but they are not strong guarantees. `Known Leak` marks places where replay can still depend on host behavior. `Out of Scope` does not mean unimportant; it means control must live in another layer. + +## Update Rule + +A PR should update this document if it: + +- introduces a new source of nondeterminism, +- changes the control status of an existing surface, +- adds a new assumption about single-threading, iteration order, runtime ownership, or host behavior, or +- removes a leak or upgrades a surface from `Audited` or `Constrained` to `Controlled`. diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 970a17889c4..4fece270157 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -8,6 +8,8 @@ For this to work, code under test must not read clocks, randomness, scheduling, This crate provides the execution-control part of that boundary: spawning, timeouts, virtual time, deterministic randomness, task scheduling, and fault decisions. Storage, networking, and replication should be modeled through higher-level abstractions. +For a tracked view of what is currently under simulator control, what is only constrained by convention, and what still leaks host behavior, see [DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md). + ## Architecture [src/lib.rs](./src/lib.rs) exposes `Handle`, a small runtime handle shared code carries. It has two variants: diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index cdb425a2086..cbdcd299b49 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -140,6 +140,7 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { eprintln!("warning: randomness requested; delegating to host OS"); + eprintln!("{}", std::backtrace::Backtrace::force_capture()); unsafe { real_getrandom()(buf, buflen, flags) } } From b27b02132deec7042f4edafcb98115ff6fa751b3 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 16:52:44 +0530 Subject: [PATCH 30/40] update determinism coverage --- Cargo.lock | 1 - crates/runtime/DETERMINISM_COVERAGE.md | 16 ++++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e117cf6d300..812c63f88ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8482,7 +8482,6 @@ version = "2.2.0" dependencies = [ "async-task", "futures", - "futures-util", "libc", "spin", "tokio", diff --git a/crates/runtime/DETERMINISM_COVERAGE.md b/crates/runtime/DETERMINISM_COVERAGE.md index 4df6c2a6df9..076efb865e4 100644 --- a/crates/runtime/DETERMINISM_COVERAGE.md +++ b/crates/runtime/DETERMINISM_COVERAGE.md @@ -28,26 +28,18 @@ It is meant to serve two purposes: | Surface | Status | Boundary | Current control or assumption | Failure mode if violated | Required direction | | --- | --- | --- | --- | --- | --- | -| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | Keep simulated task scheduling inside the sim executor | -| Simulated task lifecycle | Controlled | `runtime::sim::{executor, JoinHandle}` | Spawn, wake, cancel, and join all happen inside simulator-owned scheduling | Cancellation and join behavior diverge across runs | Keep lifecycle transitions on simulator-owned tasks only | -| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | Keep timer progression fully simulator-owned | -| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | Keep simulator-owned randomness explicit and seed-driven | -| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | Keep simulated work on simulator tasks, not OS threads | +| Executor scheduling | Controlled | `runtime::sim::executor` | Runnable selection is driven by seeded simulator RNG | Replay diverges across runs | - | +| Virtual time and timers | Controlled | `runtime::sim::time` | Simulated time advances only through explicit advance or next-timer jump | Timeouts and ordering become host-timing dependent | - | +| Runtime RNG and buggify | Controlled | `runtime::sim::rng` | Runtime RNG drives scheduler and probabilistic fault-injection decisions | RNG and fault decisions are not replayable | - | +| OS thread creation during simulation | Controlled | `runtime::sim_std` | Unix thread hook rejects `std::thread::spawn` while simulation is active | Host scheduler escapes simulator control | - | | OS entropy | Known Leak | `runtime::sim_std` | Randomness requests warn and then delegate to the OS | Same seed can produce different traces | Add backtrace to warnings, remove call sites, eventually fail closed or fully model the source | | `HashMap` randomized iteration | Audited | Runtime and caller code | Runtime does not force deterministic hash seeding; correctness must not depend on iteration order | Hidden ordering dependencies cause flaky replay | Prefer ordered maps or explicit sorting where observable order matters | | `tokio::sync` primitives | Constrained | Core crates above runtime | These can be replay-compatible only when all participating tasks remain simulator-owned and progress stays on simulator-controlled async paths | Wake ordering or blocking semantics diverge once code depends on a real runtime or host-driven progress | Audit per primitive and push deep-core paths toward runtime-owned or single-threaded structures | | `parking_lot::{}` and `std::sync::{}` | Constrained | Core crates, especially datastore | Safe only where access stays single-threaded or non-contended under DST | Host synchronization leaks nondeterministic acquisition order | Keep out of deep-core execution paths; prefer runtime-owned or single-threaded structures | | File and network I/O | Out of Scope | Runtime crate | Runtime does not simulate filesystem or network behavior | Real I/O timing, ordering, and errors are not replayable | Model via domain-specific DST abstractions | -| Tokio runtime ownership | Constrained | `spacetimedb_runtime::Handle` / shared core APIs | Shared code uses a narrow runtime boundary instead of concrete Tokio subsystems | Concrete Tokio APIs leak into DST-facing core paths | Keep shared code on runtime or domain abstractions, not raw Tokio services | | Heap allocation and OOM | Known Leak | Broad, especially deep-core direction | Allocation happens through normal Rust paths; deterministic allocation failure is not modeled | Resource-exhaustion behavior is not reproducible | Move the simulation core and eventually deep-core paths toward `no_std + alloc` with explicit allocation boundaries | | Snapshot / commitlog / datastore host effects | Out of Scope | Higher-level durability and storage layers | Runtime only provides scheduling, time, and fault-decision primitives | Storage semantics depend on real host behavior unless wrapped | Model durable behavior through domain-specific DST abstractions | -## Scope Notes - -This document covers the runtime crate and the determinism boundary it exposes to core crates and DST harnesses. - -`Controlled` is the target state for nondeterminism surfaces that must participate directly in deterministic simulation testing. `Constrained` and `Audited` are temporary states: they may be acceptable for a period, but they are not strong guarantees. `Known Leak` marks places where replay can still depend on host behavior. `Out of Scope` does not mean unimportant; it means control must live in another layer. - ## Update Rule A PR should update this document if it: From 4f6ca2334c7db2137ac7b999e83b15881fa08f02 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:05:59 +0530 Subject: [PATCH 31/40] put extern alloc behing gate --- crates/runtime/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index d2850e87c8f..aef70fcaf1a 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -1,4 +1,6 @@ +#[cfg(feature = "simulation")] extern crate alloc; + use core::{ fmt, future::Future, From 651f8c6745cd99a86746fdfefd25c03587d1585d Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:49:14 +0530 Subject: [PATCH 32/40] join handle cleanup --- crates/runtime/src/lib.rs | 130 ++++++++++++++++++++++------------ crates/runtime/src/sim_std.rs | 6 +- 2 files changed, 89 insertions(+), 47 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index aef70fcaf1a..dfc96a3c870 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -36,9 +36,9 @@ pub struct AbortHandle { enum JoinHandleInner { #[cfg(feature = "tokio")] - Tokio(Option>), + Tokio(tokio::task::JoinHandle), #[cfg(feature = "simulation")] - Simulation(Option>), + Simulation(sim::JoinHandle), Detached(PhantomData), } @@ -104,41 +104,21 @@ impl JoinHandleInner { fn abort_handle(&self) -> AbortHandle { match self { #[cfg(feature = "tokio")] - Self::Tokio(Some(handle)) => AbortHandle { + Self::Tokio(handle) => AbortHandle { inner: AbortHandleInner::Tokio(handle.abort_handle()), }, #[cfg(feature = "simulation")] - Self::Simulation(Some(handle)) => AbortHandle { + Self::Simulation(handle) => AbortHandle { inner: AbortHandleInner::Simulation(handle.abort_handle()), }, - #[cfg(feature = "tokio")] - Self::Tokio(None) => panic!("runtime join handle aborted after detach"), - #[cfg(feature = "simulation")] - Self::Simulation(None) => panic!("runtime join handle aborted after detach"), - Self::Detached(_) => panic!("runtime join handle aborted after completion"), - } - } - - fn detach(&mut self) { - match self { - #[cfg(feature = "tokio")] - Self::Tokio(handle) => { - drop(handle.take()); - } - #[cfg(feature = "simulation")] - Self::Simulation(handle) => { - if let Some(handle) = handle.take() { - handle.detach(); - } - } - Self::Detached(_) => {} + Self::Detached(_) => unreachable!("abort_handle called on a completed handle"), } } fn poll_result(&mut self, cx: &mut Context<'_>) -> Poll> { match self { #[cfg(feature = "tokio")] - Self::Tokio(Some(handle)) => match Pin::new(handle).poll(cx) { + Self::Tokio(handle) => match Pin::new(handle).poll(cx) { Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Tokio(err), @@ -146,18 +126,14 @@ impl JoinHandleInner { Poll::Pending => Poll::Pending, }, #[cfg(feature = "simulation")] - Self::Simulation(Some(handle)) => match Pin::new(handle).poll_join(cx) { + Self::Simulation(handle) => match Pin::new(handle).poll_join(cx) { Poll::Ready(Ok(output)) => Poll::Ready(Ok(output)), Poll::Ready(Err(err)) => Poll::Ready(Err(JoinError { inner: JoinErrorInner::Simulation(err), })), Poll::Pending => Poll::Pending, }, - #[cfg(feature = "tokio")] - Self::Tokio(None) => panic!("runtime join handle polled after detach"), - #[cfg(feature = "simulation")] - Self::Simulation(None) => panic!("runtime join handle polled after detach"), - Self::Detached(_) => panic!("runtime join handle polled after completion"), + Self::Detached(_) => unreachable!("poll_result called on a completed handle"), } } } @@ -166,15 +142,6 @@ impl JoinHandle { pub fn abort_handle(&self) -> AbortHandle { self.inner.abort_handle() } - - pub fn detach(mut self) { - self.detach_inner(); - } - - fn detach_inner(&mut self) { - self.inner.detach(); - self.inner = JoinHandleInner::Detached(PhantomData); - } } impl Future for JoinHandle { @@ -196,7 +163,14 @@ impl Future for JoinHandle { impl Drop for JoinHandle { fn drop(&mut self) { - self.detach_inner(); + let inner = core::mem::replace(&mut self.inner, JoinHandleInner::Detached(PhantomData)); + #[cfg(feature = "simulation")] + if let JoinHandleInner::Simulation(handle) = inner { + handle.detach(); + return; + } + // For Tokio (and Detached), dropping the handle does not cancel the task. + drop(inner); } } @@ -233,17 +207,17 @@ impl Handle { } impl Handle { - pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle<()> { + pub fn spawn(&self, future: impl Future + Send + 'static) -> JoinHandle { #[cfg(not(any(feature = "tokio", feature = "simulation")))] let _ = future; match self { #[cfg(feature = "tokio")] Self::Tokio(handle) => JoinHandle { - inner: JoinHandleInner::Tokio(Some(handle.spawn(future))), + inner: JoinHandleInner::Tokio(handle.spawn(future)), }, #[cfg(feature = "simulation")] Self::Simulation(handle) => JoinHandle { - inner: JoinHandleInner::Simulation(Some(handle.spawn_on(sim::NodeId::MAIN, future))), + inner: JoinHandleInner::Simulation(handle.spawn_on(sim::NodeId::MAIN, future)), }, #[cfg(not(any(feature = "tokio", feature = "simulation")))] _ => unreachable!("runtime dispatch has no enabled backend"), @@ -299,3 +273,69 @@ impl Handle { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }; + + + #[cfg(feature = "simulation")] + #[test] + fn dropping_joinhandle_does_not_cancel_task_in_simulation() { + use crate::sim::Runtime; + let mut rt = Runtime::new(4); + let handle = Handle::simulation(rt.handle()); + let flag = Arc::new(AtomicBool::new(false)); + let flag_clone = flag.clone(); + + rt.block_on(async { + let jh = handle.spawn(async move { + flag_clone.store(true, Ordering::Release); + }); + drop(jh); + + // Yield so the spawned task gets polled. + handle + .timeout(std::time::Duration::from_millis(50), async {}) + .await + .ok(); + }); + + assert!(flag.load(Ordering::Acquire)); + } + + #[cfg(feature = "simulation")] + #[test] + fn abort_cancels_task_in_simulation() { + use crate::sim::Runtime; + let mut rt = Runtime::new(4); + let handle = Handle::simulation(rt.handle()); + let flag = Arc::new(AtomicBool::new(false)); + let flag_clone = flag.clone(); + let handle_for_spawn = handle.clone(); + + rt.block_on(async move { + let jh = handle.spawn(async move { + // Sleep long enough that abort fires first. + handle_for_spawn + .timeout(std::time::Duration::from_millis(100), async {}) + .await + .ok(); + flag_clone.store(true, Ordering::Release); + }); + jh.abort_handle().abort(); + + let result = jh.await; + // wait to see, above task indeed cancelled. + let _ = handle + .timeout(std::time::Duration::from_millis(500), async {}) + .await; + assert!(result.is_err()); + assert!(!flag.load(Ordering::Acquire)); + }); + } +} diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index cbdcd299b49..08f82b9495e 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -139,8 +139,10 @@ unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc: #[unsafe(no_mangle)] #[inline(never)] unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize { - eprintln!("warning: randomness requested; delegating to host OS"); - eprintln!("{}", std::backtrace::Backtrace::force_capture()); + if in_simulation() { + eprintln!("warning: randomness requested; delegating to host OS"); + eprintln!("{}", std::backtrace::Backtrace::force_capture()); + } unsafe { real_getrandom()(buf, buflen, flags) } } From 74b283c4fa9aefb65d466b8117cfd25fb6b2bf70 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 17:54:11 +0530 Subject: [PATCH 33/40] update readme --- crates/runtime/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 4fece270157..18bb2289113 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -1,3 +1,5 @@ +> Welcome to the Matrix! + # spacetimedb-runtime `spacetimedb-runtime` is a runtime boundary that lets SpacetimeDB core code run under deterministic simulation testing (DST). @@ -39,10 +41,6 @@ Feature flags: - `tokio`: enables the Tokio runtime backend and remains in the default feature set. - `simulation`: enables the deterministic simulation runtime and `sim_std` helpers. -## Related documents - -- **[DETERMINISM_COVERAGE.md](./DETERMINISM_COVERAGE.md)** — tracks nondeterminism surfaces. - ## Design Principles - **Single-threaded runtime.** The simulator exposes interleaving and timeout bugs, but not bugs that require true parallel execution. The direction is to keep deep-core code single-threaded or close to thread-per-core; simulating real parallelism is out of scope. From 15b98a0cdfbd780050a9e3f4796263ede12d4c10 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:00:51 +0530 Subject: [PATCH 34/40] README about blcoking code --- crates/runtime/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/runtime/README.md b/crates/runtime/README.md index 18bb2289113..68037d752bf 100644 --- a/crates/runtime/README.md +++ b/crates/runtime/README.md @@ -56,6 +56,6 @@ Feature flags: - **One shared virtual clock.** All simulated nodes share a single clock. This masks bugs related to timing mismatch across machines. -- **`spawn_blocking` is only a facade on the simulation backend.** It delegates to a normal spawned task, so the closure still runs on the single executor thread and can block runtime progress. The direction is to avoid relying on blocking-pool semantics. +- **No good alternative for blocking APIs.** The simulation backend has no `spawn_blocking` pool or OS thread escape hatch. API like `spawn_blocking` or `Handle::block_on` delegate to the single executor thread, so blocking inside them stalls all simulated tasks. The direction is to avoid relying on blocking semantics inside the simulation boundary. - **OS randomness is not controlled.** `sim_std` warns if code reaches OS entropy. The direction is to keep application code and testing harnesses off OS randomness entirely. From 3c525f22b5614d4a181ddcab478848bc73ccee9e Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:07:48 +0530 Subject: [PATCH 35/40] comment --- .../src/sim/{executor.rs => executor/mod.rs} | 0 crates/runtime/src/sim/time/mod.rs | 3 +++ crates/runtime/src/sim/time/sleep.rs | 19 +++++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) rename crates/runtime/src/sim/{executor.rs => executor/mod.rs} (100%) diff --git a/crates/runtime/src/sim/executor.rs b/crates/runtime/src/sim/executor/mod.rs similarity index 100% rename from crates/runtime/src/sim/executor.rs rename to crates/runtime/src/sim/executor/mod.rs diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index de0a201d9f2..f8bf3571cf2 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -9,6 +9,9 @@ pub use sleep::Sleep; /// Shared virtual clock and timer registry for one simulation runtime. /// +/// Virtual clock that only advances when explicitly driven — no wall-clock +/// progression, like Tokio's time-pause mode. +/// /// All cloned handles observe the same virtual `now`, pending timers, and /// timer-id sequence. The executor uses this handle both for explicit /// time-travel operations and for jumping directly to the next pending timer diff --git a/crates/runtime/src/sim/time/sleep.rs b/crates/runtime/src/sim/time/sleep.rs index 538439018b7..53d5555ffc3 100644 --- a/crates/runtime/src/sim/time/sleep.rs +++ b/crates/runtime/src/sim/time/sleep.rs @@ -10,10 +10,21 @@ use super::{TimeHandle, TimerId}; /// Future returned by [`TimeHandle::sleep`]. /// -/// The future stores a relative duration until first poll, then converts that -/// into an absolute deadline and a stable timer id. Subsequent polls either -/// complete immediately if virtual time has already reached the deadline or -/// refresh the registered waker and remain pending. +/// Three-state machine: +/// +/// 1. **Unregistered** — first poll. Converts the relative `duration` into an +/// absolute `deadline` using the current virtual time and registers with the +/// time handle's timer table. Transitions to `Registered`. +/// +/// 2. **Registered** — subsequent polls. If virtual time has reached the +/// deadline, the timer is cancelled and the future returns `Ready`. +/// Otherwise, the waker is refreshed in the timer entry and the future +/// returns `Pending`. +/// +/// 3. **Done** — any later poll returns `Ready(()`) immediately. +/// +/// On drop while `Registered`, the timer entry is cancelled to prevent stale +/// wakers from firing after the future is abandoned. pub struct Sleep { duration: Duration, state: SleepState, From 77ebb411ad2007bce732b33f1bc78a4429e6ef84 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 18:36:02 +0530 Subject: [PATCH 36/40] executor split --- crates/runtime/src/lib.rs | 7 +- crates/runtime/src/sim/executor/mod.rs | 111 +--------------- crates/runtime/src/sim/executor/task.rs | 162 ++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 111 deletions(-) create mode 100644 crates/runtime/src/sim/executor/task.rs diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index dfc96a3c870..5611d5db3ed 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -282,7 +282,6 @@ mod tests { Arc, }; - #[cfg(feature = "simulation")] #[test] fn dropping_joinhandle_does_not_cancel_task_in_simulation() { @@ -320,7 +319,6 @@ mod tests { rt.block_on(async move { let jh = handle.spawn(async move { - // Sleep long enough that abort fires first. handle_for_spawn .timeout(std::time::Duration::from_millis(100), async {}) .await @@ -330,10 +328,7 @@ mod tests { jh.abort_handle().abort(); let result = jh.await; - // wait to see, above task indeed cancelled. - let _ = handle - .timeout(std::time::Duration::from_millis(500), async {}) - .await; + let _ = handle.timeout(std::time::Duration::from_millis(500), async {}).await; assert!(result.is_err()); assert!(!flag.load(Ordering::Acquire)); }); diff --git a/crates/runtime/src/sim/executor/mod.rs b/crates/runtime/src/sim/executor/mod.rs index c064bca804f..0b874be8afe 100644 --- a/crates/runtime/src/sim/executor/mod.rs +++ b/crates/runtime/src/sim/executor/mod.rs @@ -12,6 +12,10 @@ use spin::Mutex; use crate::sim::{time::TimeHandle, Rng}; +mod task; +pub use task::{AbortHandle, JoinError, JoinHandle}; +use task::Abortable; + type Runnable = async_task::Runnable; #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -340,105 +344,6 @@ impl Handle { } } -/// A spawned simulated task. -pub struct JoinHandle { - task: async_task::Task, NodeId>, - abort: AbortHandle, -} - -impl JoinHandle { - /// Return a handle that can cancel this task without consuming the join - /// handle. - pub fn abort_handle(&self) -> AbortHandle { - self.abort.clone() - } - - /// Detach the task so it continues running without awaiting its output. - pub fn detach(self) { - self.task.detach(); - } - - pub(crate) fn poll_join(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.task).poll(cx) - } -} - -impl Future for JoinHandle { - type Output = Result; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - self.as_mut().poll_join(cx) - } -} - -#[derive(Clone)] -pub struct AbortHandle { - state: Arc, -} - -impl AbortHandle { - pub fn abort(&self) { - self.state.aborted.store(true, Ordering::Relaxed); - if let Some(waker) = self.state.waker.lock().take() { - waker.wake(); - } - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct JoinError; - -impl fmt::Display for JoinError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("task was cancelled") - } -} - -#[cfg(feature = "simulation")] -impl std::error::Error for JoinError {} - -struct AbortState { - aborted: AtomicBool, - waker: Mutex>, -} - -impl AbortState { - fn new() -> Self { - Self { - aborted: AtomicBool::new(false), - waker: Mutex::new(None), - } - } -} - -struct Abortable { - future: F, - abort: AbortHandle, -} - -impl Abortable { - fn new(future: F, abort: AbortHandle) -> Self { - Self { future, abort } - } -} - -impl Future for Abortable { - type Output = Result; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if self.abort.state.aborted.load(Ordering::Relaxed) { - return Poll::Ready(Err(JoinError)); - } - - self.abort.state.waker.lock().replace(cx.waker().clone()); - - // SAFETY: the wrapper never moves `future` after being pinned. Only the - // cancellation fields outside `future` are accessed normally. - let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; - future.as_mut().poll(cx).map(Ok) - } -} - /// Core single-threaded scheduler backing a simulation [`Runtime`]. /// /// The executor owns the runnable queue, per-node pause state, deterministic @@ -533,9 +438,7 @@ impl Executor { { self.assert_known_node(node); - let abort = AbortHandle { - state: Arc::new(AbortState::new()), - }; + let abort = AbortHandle::new(); let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = async_task::Builder::new() @@ -554,9 +457,7 @@ impl Executor { { self.assert_known_node(node); - let abort = AbortHandle { - state: Arc::new(AbortState::new()), - }; + let abort = AbortHandle::new(); let abortable = Abortable::new(future, abort.clone()); let sender = self.sender.clone(); let (runnable, task) = unsafe { diff --git a/crates/runtime/src/sim/executor/task.rs b/crates/runtime/src/sim/executor/task.rs new file mode 100644 index 00000000000..d98ad3d8348 --- /dev/null +++ b/crates/runtime/src/sim/executor/task.rs @@ -0,0 +1,162 @@ +use alloc::sync::Arc; +use core::{ + fmt, + future::Future, + pin::Pin, + sync::atomic::{AtomicBool, Ordering}, + task::{Context, Poll, Waker}, +}; + +use spin::Mutex; + +use super::NodeId; + +/// A spawned simulated task. +/// +/// Two handles reference the same underlying allocation: +/// - `JoinHandle` awaits the output and holds an `AbortHandle` for cancellation. +/// - The executor holds the `Runnable` (not visible here). +pub struct JoinHandle { + // async_task::Task owns a shared heap-allocated cell that holds the future, + // its output, metadata (NodeId), and waker. Polling it drives the future + // to completion. Dropping it without detach cancels the future. + pub(crate) task: async_task::Task, NodeId>, + // Clone of the same AbortHandle that Abortable holds inside the task. + pub(crate) abort: AbortHandle, +} + +impl JoinHandle { + /// Return a handle that can cancel this task. + pub fn abort_handle(&self) -> AbortHandle { + self.abort.clone() + } + + /// Drop the join handle without cancelling the task. + pub fn detach(self) { + // async_task::Task::detach makes Drop a no-op — the future keeps running. + self.task.detach(); + } + + /// Poll the underlying async_task::Task for its output. + pub(crate) fn poll_join( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + // async_task::Task implements Future. Polling it drives the wrapped + // Abortable future inside the executor. + Pin::new(&mut self.task).poll(cx) + } +} + +impl Future for JoinHandle { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.as_mut().poll_join(cx) + } +} + +/// Two-phase cancellation for a simulated task. +/// +/// [`AbortHandle`] and [`Abortable`] work together: +/// - `abort()` sets an atomic flag and wakes the task so it gets polled. +/// - On the next poll, `Abortable` checks the flag and returns `Err(JoinError)`. +/// - `JoinHandle::poll` reads that error and surfaces it to the awaiting code. +/// - The task's future is dropped naturally when `Abortable` returns `Err`. +/// +/// `abort()` is thread-safe — it can be called from any task or node, and the +/// waker ensures the target task is re-scheduled even if it was blocked on I/O +/// or a timer. +#[derive(Clone)] +pub struct AbortHandle { + state: Arc, +} + +impl AbortHandle { + pub(crate) fn new() -> Self { + Self { + state: Arc::new(AbortState::new()), + } + } + + pub fn abort(&self) { + // Step 1: atomically mark the task as aborted. + self.state.aborted.store(true, Ordering::Relaxed); + // Step 2: wake the task so the executor re-schedules it for polling. + // If the task is blocked on a timer, the waker cancels that wait. + if let Some(waker) = self.state.waker.lock().take() { + waker.wake(); + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct JoinError; + +impl fmt::Display for JoinError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("task was cancelled") + } +} + +#[cfg(feature = "simulation")] +impl std::error::Error for JoinError {} + +// Shared state between AbortHandle and Abortable. +struct AbortState { + // Set to true by AbortHandle::abort(), read by Abortable::poll(). + aborted: AtomicBool, + // The executor's waker, registered by Abortable on every poll. + // Stored so abort() can wake the task even if it's waiting on I/O. + waker: Mutex>, +} + +impl AbortState { + fn new() -> Self { + Self { + aborted: AtomicBool::new(false), + waker: Mutex::new(None), + } + } +} + +/// Wraps a future so it can be cancelled via an [`AbortHandle`]. +/// +/// The executor wraps every spawned future in `Abortable`. On each poll it +/// checks the cancellation flag before progressing the inner future. +pub(crate) struct Abortable { + future: F, + abort: AbortHandle, +} + +impl Abortable { + pub(crate) fn new(future: F, abort: AbortHandle) -> Self { + Self { future, abort } + } +} + +impl Future for Abortable { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + // Check cancellation before doing any work. + if self.abort.state.aborted.load(Ordering::Relaxed) { + return Poll::Ready(Err(JoinError)); + } + + // Register the waker so abort() can wake this task. + self.abort.state.waker.lock().replace(cx.waker().clone()); + + // SAFETY: The `Abortable` struct is `#[repr(transparent)]`-like in its + // pin projection: `future` is behind the cancellation fields (`abort`) + // that are never moved once pinned. We use `map_unchecked_mut` to project + // through the struct layout, which is safe because: + // 1. `future` is a direct field of `Abortable` — no indirection. + // 2. `abort` is never moved or modified in ways that would change the + // address of `future` relative to `self`. + // 3. The caller guarantees `self` stays pinned for the lifetime of the + // future. + let mut future = unsafe { self.map_unchecked_mut(|this| &mut this.future) }; + future.as_mut().poll(cx).map(Ok) + } +} From 76a8228caa000a70936b23164b4c5d7311c6ea0f Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 19:11:09 +0530 Subject: [PATCH 37/40] lint --- crates/runtime/LICENSE | 732 +----------------------- crates/runtime/src/sim/executor/mod.rs | 2 +- crates/runtime/src/sim/executor/task.rs | 5 +- crates/runtime/src/sim/time/mod.rs | 8 +- 4 files changed, 10 insertions(+), 737 deletions(-) mode change 100644 => 120000 crates/runtime/LICENSE diff --git a/crates/runtime/LICENSE b/crates/runtime/LICENSE deleted file mode 100644 index daef5135277..00000000000 --- a/crates/runtime/LICENSE +++ /dev/null @@ -1,731 +0,0 @@ -SPACETIMEDB BUSINESS SOURCE LICENSE AGREEMENT - -Business Source License 1.1 - -Parameters - -Licensor: Clockwork Laboratories, Inc. -Licensed Work: SpacetimeDB 2.2.0 - The Licensed Work is - (c) 2023 Clockwork Laboratories, Inc. - -Additional Use Grant: You may make use of the Licensed Work provided your - application or service uses the Licensed Work with no - more than one SpacetimeDB instance in production and - provided that you do not use the Licensed Work for a - Database Service. - - A “Database Service” is a commercial offering that - allows third parties (other than your employees and - contractors) to access the functionality of the - Licensed Work by creating tables whose schemas are - controlled by such third parties. - -Change Date: 2031-04-29 - -Change License: GNU Affero General Public License v3.0 with a linking - exception - -For information about alternative licensing arrangements for the Software, -please visit: https://spacetimedb.com - -Notice - -The Business Source License (this document, or the “License”) is not an Open -Source license. However, the Licensed Work will eventually be made available -under an Open Source License, as stated in this License. - -License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. -“Business Source License” is a trademark of MariaDB Corporation Ab. - ------------------------------------------------------------------------------ - -Business Source License 1.1 - -Terms - -The Licensor hereby grants you the right to copy, modify, create derivative -works, redistribute, and make non-production use of the Licensed Work. The -Licensor may make an Additional Use Grant, above, permitting limited -production use. - -Effective on the Change Date, or the fourth anniversary of the first publicly -available distribution of a specific version of the Licensed Work under this -License, whichever comes first, the Licensor hereby grants you rights under -the terms of the Change License, and the rights granted in the paragraph -above terminate. - -If your use of the Licensed Work does not comply with the requirements -currently in effect as described in this License, you must purchase a -commercial license from the Licensor, its affiliated entities, or authorized -resellers, or you must refrain from using the Licensed Work. - -All copies of the original and modified Licensed Work, and derivative works -of the Licensed Work, are subject to this License. This License applies -separately for each version of the Licensed Work and the Change Date may vary -for each version of the Licensed Work released by Licensor. - -You must conspicuously display this License on each original or modified copy -of the Licensed Work. If you receive the Licensed Work in original or -modified form from a third party, the terms and conditions set forth in this -License apply to your use of that work. - -Any use of the Licensed Work in violation of this License will automatically -terminate your rights under this License for the current and all other -versions of the Licensed Work. - -This License does not grant you any right in any trademark or logo of -Licensor or its affiliates (provided that you may use a trademark or logo of -Licensor as expressly required by this License). - -TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, -EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND -TITLE. - -MariaDB hereby grants you permission to use this License’s text to license -your works, and to refer to it using the trademark “Business Source License”, -as long as you comply with the Covenants of Licensor below. - -Covenants of Licensor - -In consideration of the right to use this License’s text and the “Business -Source License” name and trademark, Licensor covenants to MariaDB, and to all -other recipients of the licensed work to be provided by Licensor: - -1. To specify as the Change License the GPL Version 2.0 or any later version, - or a license that is compatible with GPL Version 2.0 or a later version, - where “compatible” means that software provided under the Change License can - be included in a program with software provided under GPL Version 2.0 or a - later version. Licensor may specify additional Change Licenses without - limitation. - -2. To either: (a) specify an additional grant of rights to use that does not - impose any additional restriction on the right granted in this License, as - the Additional Use Grant; or (b) insert the text “None”. - -3. To specify a Change Date. - -4. Not to modify this License in any other way. - ------------------------------------------------------------------------------ - -Copyright (C) 2023 Clockwork Laboratories, Inc. - -This program is free software: you can redistribute it and/or modify it under -the terms of the GNU Affero General Public License, version 3, as published -by the Free Software Foundation. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -details. - -You should have received a copy of the GNU Affero General Public License -along with this program; if not, see . - -Additional permission under GNU GPL version 3 section 7 - -If you modify this Program, or any covered work, by linking or combining it -with SpacetimeDB (or a modified version of that library), containing parts -covered by the terms of the AGPL v3.0, the licensors of this Program grant -you additional permission to convey the resulting work. - -Additional permission under GNU AGPL version 3 section 13 - -If you modify this Program, or any covered work, by linking or combining it -with SpacetimeDB (or a modified version of that library), containing parts -covered by the terms of the AGPL v3.0, the licensors of this Program grant -you additional permission that, notwithstanding any other provision of this -License, you need not prominently offer all users interacting with your -modified version remotely through a computer network an opportunity to -receive the Corresponding Source of your version from a network server at no -charge, if your version supports such interaction. This permission does not -waive or modify any other obligations or terms of the AGPL v3.0, except for -the specific requirement set forth in section 13. - -A copy of the AGPL v3.0 license is reproduced below. - - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - -Copyright © 2007 Free Software Foundation, Inc. -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble -The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - -The licenses for most software and other practical works are designed to take -away your freedom to share and change the works. By contrast, our General -Public Licenses are intended to guarantee your freedom to share and change -all versions of a program--to make sure it remains free software for all its -users. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for them if you wish), that -you receive source code or can get it if you want it, that you can change the -software or use pieces of it in new free programs, and that you know you can -do these things. - -Developers that use our General Public Licenses protect your rights with two -steps: (1) assert copyright on the software, and (2) offer you this License -which gives you legal permission to copy, distribute and/or modify the -software. - -A secondary benefit of defending all users' freedom is that improvements made -in alternate versions of the program, if they receive widespread use, become -available for other developers to incorporate. Many developers of free -software are heartened and encouraged by the resulting cooperation. However, -in the case of software used on network servers, this result may fail to come -about. The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its source -code to the public. - -The GNU Affero General Public License is designed specifically to ensure -that, in such cases, the modified source code becomes available to the -community. It requires the operator of a network server to provide the source -code of the modified version running there to the users of that server. -Therefore, public use of a modified version, on a publicly accessible server, -gives the public access to the source code of the modified version. - -An older license, called the Affero General Public License and published by -Affero, was designed to accomplish similar goals. This is a different -license, not a version of the Affero GPL, but Affero has released a new -version of the Affero GPL which permits relicensing under this license. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS -0. Definitions. -"This License" refers to version 3 of the GNU Affero General Public License. - -"Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - -"The Program" refers to any copyrightable work licensed under this License. -Each licensee is addressed as "you". "Licensees" and "recipients" may be -individuals or organizations. - -To "modify" a work means to copy from or adapt all or part of the work in a -fashion requiring copyright permission, other than the making of an exact -copy. The resulting work is called a "modified version" of the earlier work -or a work "based on" the earlier work. - -A "covered work" means either the unmodified Program or a work based on the -Program. - -To "propagate" a work means to do anything with it that, without permission, -would make you directly or secondarily liable for infringement under -applicable copyright law, except executing it on a computer or modifying a -private copy. Propagation includes copying, distribution (with or without -modification), making available to the public, and in some countries other -activities as well. - -To "convey" a work means any kind of propagation that enables other parties -to make or receive copies. Mere interaction with a user through a computer -network, with no transfer of a copy, is not conveying. - -An interactive user interface displays "Appropriate Legal Notices" to the -extent that it includes a convenient and prominently visible feature that (1) -displays an appropriate copyright notice, and (2) tells the user that there -is no warranty for the work (except to the extent that warranties are -provided), that licensees may convey the work under this License, and how to -view a copy of this License. If the interface presents a list of user -commands or options, such as a menu, a prominent item in the list meets this -criterion. - -1. Source Code. -The "source code" for a work means the preferred form of the work for making -modifications to it. "Object code" means any non-source form of a work. - -A "Standard Interface" means an interface that either is an official standard -defined by a recognized standards body, or, in the case of interfaces -specified for a particular programming language, one that is widely used -among developers working in that language. - -The "System Libraries" of an executable work include anything, other than the -work as a whole, that (a) is included in the normal form of packaging a Major -Component, but which is not part of that Major Component, and (b) serves only -to enable use of the work with that Major Component, or to implement a -Standard Interface for which an implementation is available to the public in -source code form. A "Major Component", in this context, means a major -essential component (kernel, window system, and so on) of the specific -operating system (if any) on which the executable work runs, or a compiler -used to produce the work, or an object code interpreter used to run it. - -The "Corresponding Source" for a work in object code form means all the -source code needed to generate, install, and (for an executable work) run the -object code and to modify the work, including scripts to control those -activities. However, it does not include the work's System Libraries, or -general-purpose tools or generally available free programs which are used -unmodified in performing those activities but which are not part of the work. -For example, Corresponding Source includes interface definition files -associated with source files for the work, and the source code for shared -libraries and dynamically linked subprograms that the work is specifically -designed to require, such as by intimate data communication or control flow -between those subprograms and other parts of the work. - -The Corresponding Source need not include anything that users can regenerate -automatically from other parts of the Corresponding Source. - -The Corresponding Source for a work in source code form is that same work. - -2. Basic Permissions. -All rights granted under this License are granted for the term of copyright -on the Program, and are irrevocable provided the stated conditions are met. -This License explicitly affirms your unlimited permission to run the -unmodified Program. The output from running a covered work is covered by this -License only if the output, given its content, constitutes a covered work. -This License acknowledges your rights of fair use or other equivalent, as -provided by copyright law. - -You may make, run and propagate covered works that you do not convey, without -conditions so long as your license otherwise remains in force. You may convey -covered works to others for the sole purpose of having them make -modifications exclusively for you, or provide you with facilities for running -those works, provided that you comply with the terms of this License in -conveying all material for which you do not control copyright. Those thus -making or running the covered works for you must do so exclusively on your -behalf, under your direction and control, on terms that prohibit them from -making any copies of your copyrighted material outside their relationship -with you. - -Conveying under any other circumstances is permitted solely under the -conditions stated below. Sublicensing is not allowed; section 10 makes it -unnecessary. - -3. Protecting Users' Legal Rights From Anti-Circumvention Law. -No covered work shall be deemed part of an effective technological measure -under any applicable law fulfilling obligations under article 11 of the WIPO -copyright treaty adopted on 20 December 1996, or similar laws prohibiting or -restricting circumvention of such measures. - -When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention is -effected by exercising rights under this License with respect to the covered -work, and you disclaim any intention to limit operation or modification of -the work as a means of enforcing, against the work's users, your or third -parties' legal rights to forbid circumvention of technological measures. - -4. Conveying Verbatim Copies. -You may convey verbatim copies of the Program's source code as you receive -it, in any medium, provided that you conspicuously and appropriately publish -on each copy an appropriate copyright notice; keep intact all notices stating -that this License and any non-permissive terms added in accord with section 7 -apply to the code; keep intact all notices of the absence of any warranty; -and give all recipients a copy of this License along with the Program. - -You may charge any price or no price for each copy that you convey, and you -may offer support or warranty protection for a fee. - -5. Conveying Modified Source Versions. -You may convey a work based on the Program, or the modifications to produce -it from the Program, in the form of source code under the terms of section 4, -provided that you also meet all of these conditions: - -a) The work must carry prominent notices stating that you modified it, and -giving a relevant date. -b) The work must carry prominent notices stating that it is released under -this License and any conditions added under section 7. This requirement -modifies the requirement in section 4 to "keep intact all notices". -c) You must license the entire work, as a whole, under this License to anyone -who comes into possession of a copy. This License will therefore apply, along -with any applicable section 7 additional terms, to the whole of the work, and -all its parts, regardless of how they are packaged. This License gives no -permission to license the work in any other way, but it does not invalidate -such permission if you have separately received it. -d) If the work has interactive user interfaces, each must display Appropriate -Legal Notices; however, if the Program has interactive interfaces that do not -display Appropriate Legal Notices, your work need not make them do so. -A compilation of a covered work with other separate and independent works, -which are not by their nature extensions of the covered work, and which are -not combined with it such as to form a larger program, in or on a volume of a -storage or distribution medium, is called an "aggregate" if the compilation -and its resulting copyright are not used to limit the access or legal rights -of the compilation's users beyond what the individual works permit. Inclusion -of a covered work in an aggregate does not cause this License to apply to the -other parts of the aggregate. - -6. Conveying Non-Source Forms. -You may convey a covered work in object code form under the terms of sections -4 and 5, provided that you also convey the machine-readable Corresponding -Source under the terms of this License, in one of these ways: - -a) Convey the object code in, or embodied in, a physical product (including a -physical distribution medium), accompanied by the Corresponding Source fixed -on a durable physical medium customarily used for software interchange. -b) Convey the object code in, or embodied in, a physical product (including a -physical distribution medium), accompanied by a written offer, valid for at -least three years and valid for as long as you offer spare parts or customer -support for that product model, to give anyone who possesses the object code -either (1) a copy of the Corresponding Source for all the software in the -product that is covered by this License, on a durable physical medium -customarily used for software interchange, for a price no more than your -reasonable cost of physically performing this conveying of source, or (2) -access to copy the Corresponding Source from a network server at no charge. -c) Convey individual copies of the object code with a copy of the written -offer to provide the Corresponding Source. This alternative is allowed only -occasionally and noncommercially, and only if you received the object code -with such an offer, in accord with subsection 6b. -d) Convey the object code by offering access from a designated place (gratis -or for a charge), and offer equivalent access to the Corresponding Source in -the same way through the same place at no further charge. You need not -require recipients to copy the Corresponding Source along with the object -code. If the place to copy the object code is a network server, the -Corresponding Source may be on a different server (operated by you or a third -party) that supports equivalent copying facilities, provided you maintain -clear directions next to the object code saying where to find the -Corresponding Source. Regardless of what server hosts the Corresponding -Source, you remain obligated to ensure that it is available for as long as -needed to satisfy these requirements. -e) Convey the object code using peer-to-peer transmission, provided you -inform other peers where the object code and Corresponding Source of the work -are being offered to the general public at no charge under subsection 6d. -A separable portion of the object code, whose source code is excluded from -the Corresponding Source as a System Library, need not be included in -conveying the object code work. - -A "User Product" is either (1) a "consumer product", which means any tangible -personal property which is normally used for personal, family, or household -purposes, or (2) anything designed or sold for incorporation into a dwelling. -In determining whether a product is a consumer product, doubtful cases shall -be resolved in favor of coverage. For a particular product received by a -particular user, "normally used" refers to a typical or common use of that -class of product, regardless of the status of the particular user or of the -way in which the particular user actually uses, or expects or is expected to -use, the product. A product is a consumer product regardless of whether the -product has substantial commercial, industrial or non-consumer uses, unless -such uses represent the only significant mode of use of the product. - -"Installation Information" for a User Product means any methods, procedures, -authorization keys, or other information required to install and execute -modified versions of a covered work in that User Product from a modified -version of its Corresponding Source. The information must suffice to ensure -that the continued functioning of the modified object code is in no case -prevented or interfered with solely because modification has been made. - -If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as part of -a transaction in which the right of possession and use of the User Product is -transferred to the recipient in perpetuity or for a fixed term (regardless of -how the transaction is characterized), the Corresponding Source conveyed -under this section must be accompanied by the Installation Information. But -this requirement does not apply if neither you nor any third party retains -the ability to install modified object code on the User Product (for example, -the work has been installed in ROM). - -The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates for -a work that has been modified or installed by the recipient, or for the User -Product in which it has been modified or installed. Access to a network may -be denied when the modification itself materially and adversely affects the -operation of the network or violates the rules and protocols for -communication across the network. - -Corresponding Source conveyed, and Installation Information provided, in -accord with this section must be in a format that is publicly documented (and -with an implementation available to the public in source code form), and must -require no special password or key for unpacking, reading or copying. - -7. Additional Terms. -"Additional permissions" are terms that supplement the terms of this License -by making exceptions from one or more of its conditions. Additional -permissions that are applicable to the entire Program shall be treated as -though they were included in this License, to the extent that they are valid -under applicable law. If additional permissions apply only to part of the -Program, that part may be used separately under those permissions, but the -entire Program remains governed by this License without regard to the -additional permissions. - -When you convey a copy of a covered work, you may at your option remove any -additional permissions from that copy, or from any part of it. (Additional -permissions may be written to require their own removal in certain cases when -you modify the work.) You may place additional permissions on material, added -by you to a covered work, for which you have or can give appropriate -copyright permission. - -Notwithstanding any other provision of this License, for material you add to -a covered work, you may (if authorized by the copyright holders of that -material) supplement the terms of this License with terms: - -a) Disclaiming warranty or limiting liability differently from the terms of -sections 15 and 16 of this License; or -b) Requiring preservation of specified reasonable legal notices or author -attributions in that material or in the Appropriate Legal Notices displayed -by works containing it; or -c) Prohibiting misrepresentation of the origin of that material, or requiring -that modified versions of such material be marked in reasonable ways as -different from the original version; or -d) Limiting the use for publicity purposes of names of licensors or authors -of the material; or -e) Declining to grant rights under trademark law for use of some trade names, -trademarks, or service marks; or -f) Requiring indemnification of licensors and authors of that material by -anyone who conveys the material (or modified versions of it) with contractual -assumptions of liability to the recipient, for any liability that these -contractual assumptions directly impose on those licensors and authors. -All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is governed -by this License along with a term that is a further restriction, you may -remove that term. If a license document contains a further restriction but -permits relicensing or conveying under this License, you may add to a covered -work material governed by the terms of that license document, provided that -the further restriction does not survive such relicensing or conveying. - -If you add terms to a covered work in accord with this section, you must -place, in the relevant source files, a statement of the additional terms that -apply to those files, or a notice indicating where to find the applicable -terms. - -Additional terms, permissive or non-permissive, may be stated in the form of -a separately written license, or stated as exceptions; the above requirements -apply either way. - -8. Termination. -You may not propagate or modify a covered work except as expressly provided -under this License. Any attempt otherwise to propagate or modify it is void, -and will automatically terminate your rights under this License (including -any patent licenses granted under the third paragraph of section 11). - -However, if you cease all violation of this License, then your license from a -particular copyright holder is reinstated (a) provisionally, unless and until -the copyright holder explicitly and finally terminates your license, and (b) -permanently, if the copyright holder fails to notify you of the violation by -some reasonable means prior to 60 days after the cessation. - -Moreover, your license from a particular copyright holder is reinstated -permanently if the copyright holder notifies you of the violation by some -reasonable means, this is the first time you have received notice of -violation of this License (for any work) from that copyright holder, and you -cure the violation prior to 30 days after your receipt of the notice. - -Termination of your rights under this section does not terminate the licenses -of parties who have received copies or rights from you under this License. If -your rights have been terminated and not permanently reinstated, you do not -qualify to receive new licenses for the same material under section 10. - -9. Acceptance Not Required for Having Copies. -You are not required to accept this License in order to receive or run a copy -of the Program. Ancillary propagation of a covered work occurring solely as a -consequence of using peer-to-peer transmission to receive a copy likewise -does not require acceptance. However, nothing other than this License grants -you permission to propagate or modify any covered work. These actions -infringe copyright if you do not accept this License. Therefore, by modifying -or propagating a covered work, you indicate your acceptance of this License -to do so. - -10. Automatic Licensing of Downstream Recipients. -Each time you convey a covered work, the recipient automatically receives a -license from the original licensors, to run, modify and propagate that work, -subject to this License. You are not responsible for enforcing compliance by -third parties with this License. - -An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered work -results from an entity transaction, each party to that transaction who -receives a copy of the work also receives whatever licenses to the work the -party's predecessor in interest had or could give under the previous -paragraph, plus a right to possession of the Corresponding Source of the work -from the predecessor in interest, if the predecessor has it or can get it -with reasonable efforts. - -You may not impose any further restrictions on the exercise of the rights -granted or affirmed under this License. For example, you may not impose a -license fee, royalty, or other charge for exercise of rights granted under -this License, and you may not initiate litigation (including a cross-claim or -counterclaim in a lawsuit) alleging that any patent claim is infringed by -making, using, selling, offering for sale, or importing the Program or any -portion of it. - -11. Patents. -A "contributor" is a copyright holder who authorizes use under this License -of the Program or a work on which the Program is based. The work thus -licensed is called the contributor's "contributor version". - -A contributor's "essential patent claims" are all patent claims owned or -controlled by the contributor, whether already acquired or hereafter -acquired, that would be infringed by some manner, permitted by this License, -of making, using, or selling its contributor version, but do not include -claims that would be infringed only as a consequence of further modification -of the contributor version. For purposes of this definition, "control" -includes the right to grant patent sublicenses in a manner consistent with -the requirements of this License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free patent -license under the contributor's essential patent claims, to make, use, sell, -offer for sale, import and otherwise run, modify and propagate the contents -of its contributor version. - -In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent (such -as an express permission to practice a patent or covenant not to sue for -patent infringement). To "grant" such a patent license to a party means to -make such an agreement or commitment not to enforce a patent against the -party. - -If you convey a covered work, knowingly relying on a patent license, and the -Corresponding Source of the work is not available for anyone to copy, free of -charge and under the terms of this License, through a publicly available -network server or other readily accessible means, then you must either (1) -cause the Corresponding Source to be so available, or (2) arrange to deprive -yourself of the benefit of the patent license for this particular work, or -(3) arrange, in a manner consistent with the requirements of this License, to -extend the patent license to downstream recipients. "Knowingly relying" means -you have actual knowledge that, but for the patent license, your conveying -the covered work in a country, or your recipient's use of the covered work in -a country, would infringe one or more identifiable patents in that country -that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or arrangement, -you convey, or propagate by procuring conveyance of, a covered work, and -grant a patent license to some of the parties receiving the covered work -authorizing them to use, propagate, modify or convey a specific copy of the -covered work, then the patent license you grant is automatically extended to -all recipients of the covered work and works based on it. - -A patent license is "discriminatory" if it does not include within the scope -of its coverage, prohibits the exercise of, or is conditioned on the -non-exercise of one or more of the rights that are specifically granted under -this License. You may not convey a covered work if you are a party to an -arrangement with a third party that is in the business of distributing -software, under which you make payment to the third party based on the extent -of your activity of conveying the work, and under which the third party -grants, to any of the parties who would receive the covered work from you, a -discriminatory patent license (a) in connection with copies of the covered -work conveyed by you (or copies made from those copies), or (b) primarily for -and in connection with specific products or compilations that contain the -covered work, unless you entered into that arrangement, or that patent -license was granted, prior to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting any -implied license or other defenses to infringement that may otherwise be -available to you under applicable patent law. - -12. No Surrender of Others' Freedom. -If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not excuse -you from the conditions of this License. If you cannot convey a covered work -so as to satisfy simultaneously your obligations under this License and any -other pertinent obligations, then as a consequence you may not convey it at -all. For example, if you agree to terms that obligate you to collect a -royalty for further conveying from those to whom you convey the Program, the -only way you could satisfy both those terms and this License would be to -refrain entirely from conveying the Program. - -13. Remote Network Interaction; Use with the GNU General Public License. -Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users interacting -with it remotely through a computer network (if your version supports such -interaction) an opportunity to receive the Corresponding Source of your -version by providing access to the Corresponding Source from a network server -at no charge, through some standard or customary means of facilitating -copying of software. This Corresponding Source shall include the -Corresponding Source for any work covered by version 3 of the GNU General -Public License that is incorporated pursuant to the following paragraph. - -Notwithstanding any other provision of this License, you have permission to -link or combine any covered work with a work licensed under version 3 of the -GNU General Public License into a single combined work, and to convey the -resulting work. The terms of this License will continue to apply to the part -which is the covered work, but the work with which it is combined will remain -governed by version 3 of the GNU General Public License. - -14. Revised Versions of this License. -The Free Software Foundation may publish revised and/or new versions of the -GNU Affero General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies that a certain numbered version of the GNU Affero General Public -License "or any later version" applies to it, you have the option of -following the terms and conditions either of that numbered version or of any -later version published by the Free Software Foundation. If the Program does -not specify a version number of the GNU Affero General Public License, you -may choose any version ever published by the Free Software Foundation. - -If the Program specifies that a proxy can decide which future versions of the -GNU Affero General Public License can be used, that proxy's public statement -of acceptance of a version permanently authorizes you to choose that version -for the Program. - -Later license versions may give you additional or different permissions. -However, no additional obligations are imposed on any author or copyright -holder as a result of your choosing to follow a later version. - -15. Disclaimer of Warranty. -THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE -LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, -EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE -ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. -SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY -SERVICING, REPAIR OR CORRECTION. - -16. Limitation of Liability. -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL -ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE -PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE -OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR -DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR -A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH -HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -17. Interpretation of Sections 15 and 16. -If the disclaimer of warranty and limitation of liability provided above -cannot be given local legal effect according to their terms, reviewing courts -shall apply local law that most closely approximates an absolute waiver of -all civil liability in connection with the Program, unless a warranty or -assumption of liability accompanies a copy of the Program in return for a -fee. - -END OF TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs -If you develop a new program, and you want it to be of the greatest possible -use to the public, the best way to achieve this is to make it free software -which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to attach -them to the start of each source file to most effectively state the exclusion -of warranty; and each file should have at least the "copyright" line and a -pointer to where the full notice is found. - -SpacetimeDB: A database which replaces your server. -Copyright (C) 2023 Clockwork Laboratories, Inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -Also add information on how to contact you by electronic and paper mail. - -If your software can interact with users remotely through a computer network, -you should also make sure that it provides a way for users to get its source. -For example, if your program is a web application, its interface could -display a "Source" link that leads users to an archive of the code. There are -many ways you could offer source, and different solutions will be better for -different programs; see section 13 for the specific requirements. - -You should also get your employer (if you work as a programmer) or school, if -any, to sign a "copyright disclaimer" for the program, if necessary. For more -information on this, and how to apply and follow the GNU AGPL, see -. diff --git a/crates/runtime/LICENSE b/crates/runtime/LICENSE new file mode 120000 index 00000000000..8540cf8a991 --- /dev/null +++ b/crates/runtime/LICENSE @@ -0,0 +1 @@ +../../licenses/BSL.txt \ No newline at end of file diff --git a/crates/runtime/src/sim/executor/mod.rs b/crates/runtime/src/sim/executor/mod.rs index 0b874be8afe..5575cc08617 100644 --- a/crates/runtime/src/sim/executor/mod.rs +++ b/crates/runtime/src/sim/executor/mod.rs @@ -13,8 +13,8 @@ use spin::Mutex; use crate::sim::{time::TimeHandle, Rng}; mod task; -pub use task::{AbortHandle, JoinError, JoinHandle}; use task::Abortable; +pub use task::{AbortHandle, JoinError, JoinHandle}; type Runnable = async_task::Runnable; diff --git a/crates/runtime/src/sim/executor/task.rs b/crates/runtime/src/sim/executor/task.rs index d98ad3d8348..bf03a8293d3 100644 --- a/crates/runtime/src/sim/executor/task.rs +++ b/crates/runtime/src/sim/executor/task.rs @@ -38,10 +38,7 @@ impl JoinHandle { } /// Poll the underlying async_task::Task for its output. - pub(crate) fn poll_join( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { + pub(crate) fn poll_join(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { // async_task::Task implements Future. Polling it drives the wrapped // Abortable future inside the executor. Pin::new(&mut self.task).poll(cx) diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index f8bf3571cf2..56f707201be 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -1,7 +1,13 @@ mod sleep; use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; -use core::{fmt, future::Future, pin::pin, task::{Poll, Waker}, time::Duration}; +use core::{ + fmt, + future::Future, + pin::pin, + task::{Poll, Waker}, + time::Duration, +}; use sleep::wake_all; use spin::Mutex; From 78765999bf20abdd05d1d09e0d18a36f83c3f937 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Thu, 14 May 2026 23:11:41 +0530 Subject: [PATCH 38/40] lint --- crates/runtime/src/lib.rs | 2 ++ crates/runtime/src/sim/executor/mod.rs | 2 +- crates/runtime/src/sim/rng.rs | 16 ++++++++-------- crates/runtime/src/sim_std.rs | 2 ++ crates/runtime/tests/sim_e2e.rs | 1 + 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 5611d5db3ed..7df6fe2de53 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -276,7 +276,9 @@ impl Handle { #[cfg(test)] mod tests { + #[allow(unused_imports)] use super::*; + #[cfg(any(feature = "tokio", feature = "simulation"))] use std::sync::{ atomic::{AtomicBool, Ordering}, Arc, diff --git a/crates/runtime/src/sim/executor/mod.rs b/crates/runtime/src/sim/executor/mod.rs index 5575cc08617..ff75cba0aef 100644 --- a/crates/runtime/src/sim/executor/mod.rs +++ b/crates/runtime/src/sim/executor/mod.rs @@ -489,7 +489,7 @@ impl Executor { self.run_all_ready(); if task.is_finished() { let waker = Waker::noop(); - return match Pin::new(&mut task).poll(&mut Context::from_waker(&waker)) { + return match Pin::new(&mut task).poll(&mut Context::from_waker(waker)) { Poll::Ready(output) => output, Poll::Pending => unreachable!("task.is_finished() was true"), }; diff --git a/crates/runtime/src/sim/rng.rs b/crates/runtime/src/sim/rng.rs index b39219290dd..c210ff8b781 100644 --- a/crates/runtime/src/sim/rng.rs +++ b/crates/runtime/src/sim/rng.rs @@ -169,14 +169,14 @@ impl GlobalRng { #[allow(dead_code)] pub(crate) fn finish_determinism_check(&self) -> Result<(), String> { let inner = self.inner.lock(); - if let Some((log, consumed)) = &inner.check { - if *consumed != log.len() { - return Err(format!( - "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", - inner.seed, - log.len() - )); - } + if let Some((log, consumed)) = &inner.check + && *consumed != log.len() + { + return Err(format!( + "non-determinism detected for seed {}: consumed {consumed} of {} checkpoints", + inner.seed, + log.len() + )); } Ok(()) } diff --git a/crates/runtime/src/sim_std.rs b/crates/runtime/src/sim_std.rs index 08f82b9495e..5305c6ea166 100644 --- a/crates/runtime/src/sim_std.rs +++ b/crates/runtime/src/sim_std.rs @@ -5,6 +5,8 @@ //! running, checks determinism by replaying a seed in fresh OS threads, and //! intercepts a few libc calls so std code cannot silently escape determinism. +#![allow(clippy::disallowed_macros)] + use alloc::boxed::Box; use core::{cell::Cell, future::Future}; use std::sync::OnceLock; diff --git a/crates/runtime/tests/sim_e2e.rs b/crates/runtime/tests/sim_e2e.rs index 1f505696801..53c218da729 100644 --- a/crates/runtime/tests/sim_e2e.rs +++ b/crates/runtime/tests/sim_e2e.rs @@ -1,4 +1,5 @@ #![cfg(feature = "simulation")] +#![allow(clippy::disallowed_macros)] use std::{sync::Arc, time::Duration}; From 0b2c53c2463a637536249aab6b9af3d6d30c82c9 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Fri, 15 May 2026 13:13:43 +0530 Subject: [PATCH 39/40] unused import lint --- crates/runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 7df6fe2de53..6fdeeb81dfb 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -278,7 +278,7 @@ impl Handle { mod tests { #[allow(unused_imports)] use super::*; - #[cfg(any(feature = "tokio", feature = "simulation"))] + #[allow(unused_imports)] use std::sync::{ atomic::{AtomicBool, Ordering}, Arc, From 740170ad6c3cfa4acf5d80fd25546ad68c4cfff0 Mon Sep 17 00:00:00 2001 From: Shubham Mishra Date: Tue, 19 May 2026 17:05:54 +0530 Subject: [PATCH 40/40] review commentary --- crates/runtime/src/lib.rs | 12 ++++++++++++ crates/runtime/src/sim/time/mod.rs | 6 ++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 6fdeeb81dfb..eaed2f35f46 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -39,6 +39,18 @@ enum JoinHandleInner { Tokio(tokio::task::JoinHandle), #[cfg(feature = "simulation")] Simulation(sim::JoinHandle), + // Placeholder variant left behind whenever the real backend handle needs + // to be extracted from this enum while keeping the `JoinHandle` alive. + // + // This happens in two cases: + // + // 1. After the task output has been yielded — the backend handle no longer + // owns `T`, so we swap it out for a neutral placeholder rather than + // leave a semantically-invalid variant in place. + // 2. In `Drop`, so we can call `detach()` on the simulation handle (which + // keeps the task alive) while tokio handles can just be dropped. + // + // `PhantomData` is here only to keep the enum covariant in `T`. Detached(PhantomData), } diff --git a/crates/runtime/src/sim/time/mod.rs b/crates/runtime/src/sim/time/mod.rs index 56f707201be..33a6659028f 100644 --- a/crates/runtime/src/sim/time/mod.rs +++ b/crates/runtime/src/sim/time/mod.rs @@ -15,8 +15,10 @@ pub use sleep::Sleep; /// Shared virtual clock and timer registry for one simulation runtime. /// -/// Virtual clock that only advances when explicitly driven — no wall-clock -/// progression, like Tokio's time-pause mode. +/// This is conceptually similar to Tokio's paused-time: +/// simulated time does not track wall clock time, and the +/// executor may jump directly to the next pending timer when no runnable work +/// remains. /// /// All cloned handles observe the same virtual `now`, pending timers, and /// timer-id sequence. The executor uses this handle both for explicit