diff --git a/Cargo.lock b/Cargo.lock index 812c63f88ba..84097cd7086 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8194,6 +8194,28 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "spacetimedb-dst" +version = "2.2.0" +dependencies = [ + "anyhow", + "clap 4.5.50", + "futures-util", + "spacetimedb-commitlog", + "spacetimedb-core", + "spacetimedb-datastore", + "spacetimedb-durability", + "spacetimedb-lib 2.2.0", + "spacetimedb-primitives 2.2.0", + "spacetimedb-runtime", + "spacetimedb-sats 2.2.0", + "spacetimedb-schema", + "spacetimedb-snapshot", + "spacetimedb-table", + "tracing", + "tracing-subscriber", +] + [[package]] name = "spacetimedb-durability" version = "2.2.0" diff --git a/Cargo.toml b/Cargo.toml index f4f74204ea3..094ad0d6b01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "crates/commitlog", "crates/core", "crates/data-structures", + "crates/dst", "crates/datastore", "crates/durability", "crates/execution", diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index abc8729c978..7123b7e3bb6 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -156,7 +156,7 @@ impl Options { /// The canonical commitlog API over a repository backend `R`. /// /// The default backend is the on-disk filesystem repository -/// [`repo::Fs`], but tests may supply another [`Repo`] +/// [`repo::Fs`], but tests and simulators may supply another [`Repo`] /// implementation. /// /// Records in the log are of type `T`, which canonically is instantiated to @@ -203,7 +203,7 @@ where { /// Open the log in `repo` with [`Options`]. /// - /// This is useful for tests which provide a repository + /// This is useful for tests and simulators which provide a repository /// implementation other than [`repo::Fs`]. pub fn open_with_repo(repo: R, opts: Options) -> io::Result { let inner = commitlog::Generic::open(repo, opts)?; diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 3d79f7f1e28..76c5d2e365b 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -161,6 +161,8 @@ pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +impl RepoWithoutLockFile for Fs {} + #[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 0e202229dea..f194cb60a48 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader}; +use tokio::io::{AsyncRead, BufReader, ReadBuf}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index f749f72850a..07ec4d356c3 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -44,6 +44,7 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Handle, databas info!("{label} durability shut down at tx offset: {offset:?}"); } } + log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index ce3ef5d6841..c54a287bec2 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,10 +4,10 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::DynSnapshotRepo; +use spacetimedb_runtime::Handle; +use spacetimedb_snapshot::{DynSnapshotRepo, SnapshotStore}; use crate::{messages::control_db::Database, util::asyncify}; -use spacetimedb_runtime::Handle; use super::{ relational_db::{self, Txdata}, @@ -36,6 +36,8 @@ pub struct Persistence { /// Currently the expectation is that the reported size is the commitlog /// size only. pub disk_size: DiskSizeFn, + /// Optional snapshot store used during database restore. + pub snapshot_store: Option>, /// An optional [SnapshotWorker]. /// /// The current expectation is that snapshots are only enabled for @@ -63,9 +65,11 @@ impl Persistence { snapshots: Option, runtime: Handle, ) -> Self { + let snapshot_store = snapshots.as_ref().map(SnapshotWorker::snapshot_store); Self { durability: Arc::new(durability), disk_size: Arc::new(disk_size), + snapshot_store, snapshots, runtime, } @@ -76,6 +80,13 @@ impl Persistence { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } + /// If snapshot restore is enabled, get the [SnapshotStore] to read from. + pub fn snapshot_store(&self) -> Option> { + self.snapshot_store + .clone() + .or_else(|| self.snapshots.as_ref().map(SnapshotWorker::snapshot_store)) + } + /// Get the [TxOffset] reported as durable by the [Durability] impl. /// /// Returns `Ok(None)` if no offset is durable yet, and `Err(DurabilityExited)` @@ -107,6 +118,7 @@ impl Persistence { |Self { durability, disk_size, + snapshot_store: _, snapshots, runtime, }| (Some(durability), Some(disk_size), snapshots, Some(runtime)), @@ -173,6 +185,7 @@ impl PersistenceProvider for LocalPersistenceProvider { Ok(Persistence { durability, disk_size, + snapshot_store: Some(snapshot_worker.snapshot_store()), snapshots: Some(snapshot_worker), runtime, }) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 57230e8866b..75efb0ad5ee 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -52,7 +52,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository, SnapshotStore}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -279,10 +279,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); - let snapshot_repo = persistence.as_ref().and_then(|p| p.snapshot_repo()); + let snapshot_store = persistence.as_ref().and_then(|p| p.snapshot_store()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - snapshot_repo.as_deref(), + snapshot_store.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -473,7 +473,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_repo: Option<&DynSnapshotRepo>, + snapshot_store: Option<&dyn SnapshotStore>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -481,14 +481,14 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &DynSnapshotRepo, + snapshot_store: &dyn SnapshotStore, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { log::info!("[{database_identity}] DATABASE: restoring snapshot of tx_offset {snapshot_offset}"); let start = std::time::Instant::now(); - let snapshot = snapshot_repo + let snapshot = snapshot_store .read_snapshot(snapshot_offset, page_pool) .map_err(Box::new)?; @@ -554,11 +554,11 @@ impl RelationalDB { } } - if let Some((snapshot_repo, durable_tx_offset)) = snapshot_repo.zip(durable_tx_offset) { + if let Some((snapshot_store, durable_tx_offset)) = snapshot_store.zip(durable_tx_offset) { // Mark any newer snapshots as invalid, as the history past // `durable_tx_offset` may have been reset and thus diverge from // any snapshots taken earlier. - snapshot_repo + snapshot_store .invalidate_newer_snapshots(durable_tx_offset) .map_err(|e| RestoreSnapshotError::Invalidate { offset: durable_tx_offset, @@ -569,7 +569,7 @@ impl RelationalDB { // range `(min_commitlog_offset + 1)..=durable_tx_offset`. let mut upper_bound = durable_tx_offset; loop { - let Some(snapshot_offset) = snapshot_repo + let Some(snapshot_offset) = snapshot_store .latest_snapshot_older_than(upper_bound) .map_err(Box::new)? else { @@ -579,7 +579,7 @@ impl RelationalDB { log::debug!("snapshot_offset={snapshot_offset} min_commitlog_offset={min_commitlog_offset}"); break; } - match try_load_snapshot(&database_identity, snapshot_repo, snapshot_offset, &page_pool) { + match try_load_snapshot(&database_identity, snapshot_store, snapshot_offset, &page_pool) { Ok(snapshot) if snapshot.database_identity != database_identity => { return Err(RestoreSnapshotError::IdentityMismatch { expected: database_identity, @@ -595,7 +595,7 @@ impl RelationalDB { // Newly created snapshots should not depend on it. if !is_transient_error(&e) { log::info!("invalidating bad snapshot at {snapshot_offset}"); - snapshot_repo.invalidate_snapshot(snapshot_offset).map_err(|e| { + snapshot_store.invalidate_snapshot(snapshot_offset).map_err(|e| { RestoreSnapshotError::Invalidate { offset: snapshot_offset, source: Box::new(e), @@ -1964,6 +1964,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_store: snapshots.as_ref().map(SnapshotWorker::snapshot_store), snapshots, runtime, }; @@ -2090,6 +2091,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_store: snapshots.as_ref().map(SnapshotWorker::snapshot_store), snapshots, runtime, }; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 178bbda3d72..ac792ee0293 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, CompressionStats, DynSnapshotRepo, SnapshotRepo, SnapshotStore}; use tokio::sync::watch; use crate::worker_metrics::WORKER_METRICS; @@ -62,6 +62,7 @@ pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, snapshot_repository: Arc, + snapshot_store: Arc, } impl SnapshotWorker { @@ -70,20 +71,25 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression, rt: Handle) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repo: Arc, compression: Compression, rt: Handle) -> Self + where + R: SnapshotRepo + 'static, + { + let snapshot_store: Arc = snapshot_repo.clone(); + let snapshot_repo: Arc = snapshot_repo; + let database = snapshot_repo.database_identity(); + let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), rt: rt.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), rt: rt.clone(), @@ -94,7 +100,8 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository, + snapshot_repository: snapshot_repo, + snapshot_store, } } @@ -113,6 +120,11 @@ impl SnapshotWorker { self.snapshot_repository.clone() } + /// Get the snapshot store this worker is operating on. + pub fn snapshot_store(&self) -> Arc { + self.snapshot_store.clone() + } + /// Request a snapshot to be taken. /// /// The snapshot will be taken at some point in the future. diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 4c94df74ab8..742e2eddf83 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2102,6 +2102,7 @@ mod tests { Some(Persistence { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), + snapshot_store: None, snapshots: None, runtime: spacetimedb_runtime::Handle::tokio(rt), }), diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index e9d67103b16..254f44c4e01 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -38,7 +38,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot, SnapshotStore}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -259,6 +259,28 @@ impl Locking { Ok(Some((tx_offset, unflushed_snapshot))) } + pub fn take_snapshot_store_internal( + committed_state: &RwLock, + store: &dyn SnapshotStore, + ) -> Result> { + let mut committed_state = committed_state.write(); + let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { + return Ok(None); + }; + + log::info!( + "Capturing snapshot of database {:?} at TX offset {}", + store.database_identity(), + tx_offset, + ); + + let (mut tables, blob_store) = committed_state.persistent_tables_and_blob_store(); + store + .capture_snapshot(&mut tables, blob_store, tx_offset) + .map(Some) + .map_err(Into::into) + } + /// Returns a list over all the currently connected clients, /// reading from the `st_clients` system table. pub fn connected_clients<'a>( @@ -2824,6 +2846,38 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_try_begin_mut_tx_reports_writer_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_mut_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.rollback_mut_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after rollback"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + + #[test] + fn test_try_begin_mut_tx_reports_read_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.release_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after read release"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + #[test] fn test_scheduled_table_insert_and_update() -> ResultTest<()> { // Build the minimal schema that is a valid scheduler table. diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml new file mode 100644 index 00000000000..c3e2b3ea519 --- /dev/null +++ b/crates/dst/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "spacetimedb-dst" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Deterministic simulation testing utilities for SpacetimeDB crates" +rust-version.workspace = true + +[lints] +workspace = true + +[[bin]] +name = "spacetimedb-dst" +path = "src/main.rs" +bench = false + +[dependencies] +anyhow.workspace = true +clap.workspace = true +futures-util.workspace = true +spacetimedb-datastore = { workspace = true, features = ["test"] } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0", features = ["test"] } +spacetimedb-commitlog = { workspace = true, features = ["test"] } +spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } +spacetimedb-lib.workspace = true +spacetimedb-snapshot.workspace = true +spacetimedb-primitives.workspace = true +spacetimedb-runtime = { workspace = true, features = ["simulation"] } +spacetimedb-sats.workspace = true +spacetimedb-schema = { workspace = true, features = ["test"] } +spacetimedb-table.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md new file mode 100644 index 00000000000..e9c756a5646 --- /dev/null +++ b/crates/dst/README.md @@ -0,0 +1,227 @@ +# `spacetimedb-dst` + +Deterministic simulation testing for SpacetimeDB components. + +DST is not a generic random fuzzer. It is a seed-replayable framework for +generating meaningful SpacetimeDB histories, executing them against real +implementation paths, and checking semantic properties while the run is still +in progress. + +## First Principles + +- A failing run must be reproducible from target, scenario, seed, run budget, + and fault profile. Use `--max-interactions` for exact replay; `--duration` is + a wall-clock soak budget and may stop at a different step count on another + machine or runtime. +- Workloads describe legal but stressful user behavior. They should not depend + on target internals. +- Targets execute interactions against real SpacetimeDB code. +- Properties check externally observable behavior, preferably against a simple + model or a replayed durable history. +- Generation, execution, and property checking stay separate so failures are + diagnosable as workload bugs, target bugs, or weak assertions. +- Runs stream interactions instead of materializing a full plan by default. +- Fault injection is explicit, configurable, and summarized in the outcome. +- Shared probability and weighting logic belongs in `workload::strategy`, not + ad hoc scenario code. + +## Current Architecture + +The CLI selects a target, scenario, seed, budget, and fault profile. The shared +runner pulls one interaction at a time from a source, sends it to the target, +and asks the property runtime to observe the result. + +```text +CLI -> TargetDescriptor -> WorkloadSource -> TargetEngine -> Observation + \-> StreamingProperties -> Outcome +``` + +The core contracts are: + +- `WorkloadSource`: deterministic pull-based interaction stream. +- `TargetEngine`: target-specific execution and outcome collection. +- `StreamingProperties`: reusable property checks over observations and target + accessors. + +## Client Model + +DST workloads use shared logical client IDs rather than target-owned ad hoc +connection numbers. A `ClientId` is a stable actor in the generated history; a +`SessionId` is one live connection/session for that actor. A single client can +own multiple active sessions, which matters for reconnect, multi-tab, and future +replication traffic. Targets translate those IDs into their own handles: + +- `relational-db-commitlog` maps `SessionId` to direct write/read transaction + slots. +- future replication targets can map `SessionId` plus endpoint/node IDs to a + client connection routed through the simulated network. + +Concrete handles stay target-owned. Shared workloads should carry logical +identity and lifecycle intent, not `RelTx`, websocket handles, or target-specific +connection objects. + +## Workload Composition + +DST workloads use three building blocks: + +- **Source:** emits a deterministic stream of interactions. +- **Profile:** configures weights, schema shape, and generation policy. +- **Layer:** wraps a source and adds lifecycle, fault, or cross-cutting + interactions. + +`table_ops` is the base table-transaction workload. `commitlog_ops` composes it +and injects durability lifecycle operations such as sync, close/reopen, dynamic +table create/migrate/drop, and replay checks. + +Use this rule of thumb: + +- Add a new profile when the interaction language is unchanged and only weights + or schema shape differ. +- Add a new layer when you are adding lifecycle behavior around an existing + source. +- Add a new workload family only when the interaction vocabulary is genuinely + different. + +## Table Operation Semantics + +The table workload keeps the executable operation language small. Similar +cases converge into physical operations such as `InsertRows`, `DeleteRows`, and +`BeginTx`; the generated interaction also carries a case label for coverage and +debug output. + +Correctness does not come from that label. The property runtime asks its model +what the physical operation should do: + +- inserting fresh rows should mutate the table +- inserting an exact visible row should be an idempotent no-op +- inserting an existing primary id with a different payload should report a + unique-key error +- deleting visible rows should mutate the table +- deleting absent rows should report a missing-row error +- beginning or writing behind another writer should report a write conflict +- query operations (`PointLookup`, `PredicateCount`, `RangeScan`, `FullScan`) + should match the model-visible state + +The case label still matters for summaries. It lets a run report that it hit +`ExactDuplicateInsert` or `UniqueKeyConflictInsert`, without teaching the target +or properties to trust generator-provided expectations. + +## Current Targets + +- `relational-db-commitlog`: runs table and commitlog lifecycle interactions + against `RelationalDB`, local durability, dynamic schema operations, + close/reopen, and replay-from-history checks. + +## Properties + +Properties live in `src/properties.rs` and are selected by target. +Table-oriented properties use `TargetPropertyAccess` so the property runtime can +ask a target for rows, counts, lookups, and range scans without knowing target +storage internals. + +Current property families include: + +- insert/select and delete/select checks +- observed error vs model-predicted error matching +- model-predicted no-op checks +- point lookup, predicate count, range scan, and full scan vs the table oracle +- NoREC-style optimizer-vs-direct checks +- TLP-style true/false/null partition checks +- index range exclusion checks +- banking mirror-table invariants +- dynamic migration auto-increment checks +- durable replay state vs the oracle committed model + +## Fault Injection + +`relational-db-commitlog` can wrap the in-memory commitlog repo in +`BuggifiedRepo`. Fault decisions are deterministic from the run seed and +summarized in the final outcome. + +Profiles: + +- `off`: no injected disk behavior. +- `light`: latency and occasional short I/O. +- `default`: stronger latency and short I/O pressure. +- `aggressive`: higher latency and short I/O rates. I/O error hooks exist but + are currently disabled in profile-driven runs because local durability does + not yet classify those errors as recoverable target outcomes. + +## Running + +Fast local run: + +```bash +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 200 +``` + +Scenario examples: + +```bash +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m +``` + +Run with commitlog faults: + +```bash +cargo run -p spacetimedb-dst -- run \ + --target relational-db-commitlog \ + --seed 42 \ + --max-interactions 400 \ + --commitlog-fault-profile default +``` + +Trace every interaction: + +```bash +RUST_LOG=trace cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --duration 5m +``` + +## Run Budgets + +Prefer `--max-interactions` when reporting or replaying a failure. It is the +deterministic interaction budget, so target, scenario, seed, interaction count, +and fault profile are enough to rerun the same generated stream. + +Use `--duration` for local soaks. It is intentionally wall-clock based, so it +can stop after a different number of interactions if host speed, logging, or +runtime behavior changes. + +## Reading The Code + +Start here: + +- `src/core/mod.rs`: source, engine, property, and runner traits. +- `src/workload/table_ops`: table interaction language, generation model, and + scenarios. +- `src/workload/commitlog_ops`: lifecycle layer over table workloads. +- `src/sim/`: local executor and deterministic-decision shim. +- `src/properties.rs`: property catalog and oracle/model checks. +- `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, + commitlog durability, fault injection, close/reopen, and replay. +- `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. + +## Adding A New Target + +1. Add a target engine in `src/targets/.rs`. +2. Reuse an existing workload family or add `src/workload//`. +3. Return observations that are rich enough for properties to validate behavior. +4. Plug target-specific properties through `PropertyRuntime`. +5. Add a `TargetDescriptor` in `src/targets/descriptor.rs`. +6. Register the target in CLI `TargetKind`. + +## Current Gaps + +- No structured trace/replay format yet. +- No shrinker yet; seed replay is the current reproduction mechanism. +- Sometimes-property reporting is still outcome-counter based, not a stable + property-event catalog. +- The local `sim` shim is not a real simulator yet. It owns executor setup and + deterministic fault decisions so future simulator work has one boundary. +- The current `RelationalDB` target drives open read snapshots to release before + starting writes, because beginning a write behind an open read snapshot can + block in this target shape. Interleaved read/write snapshot histories should + come back once the target models that lock behavior explicitly. +- Runtime-boundary work for scheduler, time, network, filesystem, and lower + randomness sources is still future work. diff --git a/crates/dst/src/client.rs b/crates/dst/src/client.rs new file mode 100644 index 00000000000..84b215a7198 --- /dev/null +++ b/crates/dst/src/client.rs @@ -0,0 +1,70 @@ +//! Logical client and topology identifiers shared by DST workloads and targets. +//! +//! These IDs are part of the generated workload language. Targets translate +//! them into concrete handles such as direct database transaction slots, +//! `ClientConnection`s, websocket sessions, or simulated-node connections. + +use std::fmt; + +/// Stable logical client identity within one DST run. +/// +/// A `ClientId` is an actor/user identity, not a live network connection. One +/// client may own zero, one, or many [`SessionId`]s at the same time. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ClientId(u32); + +impl ClientId { + pub const ZERO: Self = Self(0); + + pub const fn new(raw: u32) -> Self { + Self(raw) + } +} + +impl fmt::Display for ClientId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "client{}", self.0) + } +} + +/// Logical live connection/session for a client. +/// +/// Current single-process targets use `SessionId` anywhere old DST code said +/// "connection": transaction slots, read snapshots, reducer-call handles, and +/// property observations. A target translates this logical session into its +/// concrete handle, such as a `RelTx` slot or `ClientConnection`. +/// +/// The `generation` field is the per-client session ordinal. Workloads can keep +/// several generations active concurrently to model one client with multiple +/// open connections, or allocate a later generation after a reconnect. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct SessionId { + pub client: ClientId, + pub generation: u32, +} + +impl SessionId { + pub const ZERO: Self = Self::new(ClientId::ZERO, 0); + + pub const fn new(client: ClientId, generation: u32) -> Self { + Self { client, generation } + } + + /// Compatibility helper for today's fixed-size session pools. + /// + /// A run with `N` connections starts as one logical client with `N` + /// sessions: `client0/session0`, `client0/session1`, ... + pub(crate) const fn from_index(index: usize) -> Self { + Self::new(ClientId::ZERO, index as u32) + } + + pub(crate) const fn as_index(self) -> usize { + self.generation as usize + } +} + +impl fmt::Display for SessionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.session{}", self.client, self.generation) + } +} diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs new file mode 100644 index 00000000000..1f37e217fb8 --- /dev/null +++ b/crates/dst/src/config.rs @@ -0,0 +1,98 @@ +//! Shared run-budget configuration for DST targets. + +use std::time::{Duration, Instant}; + +/// Storage fault-injection profile for commitlog and snapshot wrappers. +/// +/// These are not CLI options yet; they are programmatic knobs for targeted +/// fault-injection tests. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) enum CommitlogFaultProfile { + /// No faults injected regardless of buggify state. + Off, + /// Low probability latency and short I/O only. + Light, + /// Moderate-latency and short I/O only. + #[default] + Default, + /// Heavy-latency and short I/O only. + Aggressive, +} + +/// Common stop conditions for generated DST runs. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct RunConfig { + /// Hard cap on generated interactions. `None` means no interaction budget. + /// + /// This is the preferred budget for exact seed replay: the same target, + /// scenario, seed, max-interactions value, and fault profile should produce + /// the same generated interaction stream. + pub max_interactions: Option, + /// Wall-clock duration budget in milliseconds. `None` means no time budget. + /// + /// Duration runs are useful as local soaks, but the exact stop step can vary + /// with host speed and runtime behavior. Use `max_interactions` when a + /// failure needs precise replay. + pub max_duration_ms: Option, +} + +impl Default for RunConfig { + fn default() -> Self { + Self { + max_interactions: None, + max_duration_ms: None, + } + } +} + +impl RunConfig { + pub fn with_max_interactions(max_interactions: usize) -> Self { + Self { + max_interactions: Some(max_interactions), + max_duration_ms: None, + } + } + + pub fn with_duration_spec(duration: &str) -> anyhow::Result { + Ok(Self { + max_interactions: None, + max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), + }) + } + + /// Return the wall-clock deadline for duration-budgeted runs. + /// + /// This intentionally uses `std::time::Instant`, not simulated time. DST + /// duration budgets are a harness stop condition rather than part of the + /// simulated system under test. + pub fn deadline(&self) -> Option { + self.max_duration_ms + .map(Duration::from_millis) + .map(|duration| Instant::now() + duration) + } + + pub fn max_interactions_or_default(&self, default: usize) -> usize { + self.max_interactions.unwrap_or(default) + } +} + +pub fn parse_duration_spec(spec: &str) -> anyhow::Result { + let spec = spec.trim(); + if spec.is_empty() { + anyhow::bail!("duration spec cannot be empty"); + } + + let split_at = spec + .find(|ch: char| !ch.is_ascii_digit()) + .ok_or_else(|| anyhow::anyhow!("duration spec missing unit: {spec}"))?; + let (digits, unit) = spec.split_at(split_at); + let value: u64 = digits.parse()?; + + match unit { + "ms" => Ok(Duration::from_millis(value)), + "s" => Ok(Duration::from_secs(value)), + "m" => Ok(Duration::from_secs(value.saturating_mul(60))), + "h" => Ok(Duration::from_secs(value.saturating_mul(60 * 60))), + _ => anyhow::bail!("unsupported duration unit: {unit}"), + } +} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs new file mode 100644 index 00000000000..400c132a35f --- /dev/null +++ b/crates/dst/src/core/mod.rs @@ -0,0 +1,264 @@ +//! Core abstractions for pluggable DST workloads, engines, and properties. + +use std::{ + any::Any, + fmt::Debug, + future::Future, + panic::{self, AssertUnwindSafe}, +}; + +use crate::config::RunConfig; +use futures_util::FutureExt; + +/// Pull-based deterministic interaction source. +pub trait WorkloadSource { + type Interaction; + + fn next_interaction(&mut self) -> Option; + fn request_finish(&mut self); +} + +/// Target execution contract over a workload interaction stream. +pub trait TargetEngine { + type Observation; + type Outcome; + type Error; + + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a I, + ) -> impl Future> + 'a; + fn finish(&mut self); + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a; +} + +/// Property runtime contract for the shared streaming runner. +pub trait StreamingProperties +where + E: TargetEngine, +{ + fn observe(&mut self, engine: &E, interaction: &I, observation: &O) -> Result<(), String>; + fn finish(&mut self, engine: &E, outcome: &E::Outcome) -> Result<(), String>; +} + +/// Shared streaming runner with property orchestration. +pub async fn run_streaming( + mut source: S, + mut engine: E, + mut properties: P, + cfg: RunConfig, +) -> anyhow::Result +where + I: Clone + Debug, + S: WorkloadSource, + E: TargetEngine, + P: StreamingProperties, +{ + let deadline = cfg.deadline(); + let mut step = 0usize; + loop { + if deadline.is_some_and(|d| std::time::Instant::now() >= d) { + source.request_finish(); + } + let Some(interaction) = source.next_interaction() else { + break; + }; + let execution = guard_target("execute_interaction", step, Some(&interaction), || { + engine.execute_interaction(&interaction) + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + let observation = execution.map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; + properties + .observe(&engine, &interaction, &observation) + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + step = step.saturating_add(1); + } + guard_target("finish", step, Option::<&I>::None, || async { + engine.finish(); + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + let outcome = guard_target("collect_outcome", step, Option::<&I>::None, || engine.collect_outcome()) + .await + .map_err(|e| anyhow::anyhow!("property violation while collecting outcome: {e}"))??; + properties + .finish(&engine, &outcome) + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + Ok(outcome) +} + +async fn guard_target( + phase: &'static str, + step: usize, + interaction: Option<&I>, + make_future: impl FnOnce() -> Fut, +) -> Result +where + I: Debug, + Fut: Future, +{ + let future = panic::catch_unwind(AssertUnwindSafe(make_future)) + .map_err(|payload| not_crash_error(phase, step, interaction, &payload))?; + AssertUnwindSafe(future) + .catch_unwind() + .await + .map_err(|payload| not_crash_error(phase, step, interaction, &payload)) +} + +fn not_crash_error( + phase: &'static str, + step: usize, + interaction: Option<&I>, + payload: &Box, +) -> String { + let payload = panic_payload_to_string(payload); + match interaction { + Some(interaction) => { + format!("[NotCrash] target panicked during {phase} at step {step}: interaction={interaction:?}, payload={payload}") + } + None => format!("[NotCrash] target panicked during {phase} after step {step}: payload={payload}"), + } +} + +fn panic_payload_to_string(payload: &Box) -> String { + if let Some(message) = payload.downcast_ref::<&'static str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "".to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Clone, Debug)] + struct TestInteraction; + + struct SingleStepSource { + emitted: bool, + } + + impl SingleStepSource { + fn new() -> Self { + Self { emitted: false } + } + } + + impl WorkloadSource for SingleStepSource { + type Interaction = TestInteraction; + + fn next_interaction(&mut self) -> Option { + if self.emitted { + None + } else { + self.emitted = true; + Some(TestInteraction) + } + } + + fn request_finish(&mut self) {} + } + + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + enum PanicPhase { + Execute, + Finish, + CollectOutcome, + } + + struct PanicEngine { + phase: PanicPhase, + } + + impl PanicEngine { + fn new(phase: PanicPhase) -> Self { + Self { phase } + } + } + + impl TargetEngine for PanicEngine { + type Observation = (); + type Outcome = (); + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + _interaction: &'a TestInteraction, + ) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::Execute { + panic!("execute panic"); + } + Ok(()) + } + } + + fn finish(&mut self) { + if self.phase == PanicPhase::Finish { + panic!("finish panic"); + } + } + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::CollectOutcome { + panic!("collect panic"); + } + Ok(()) + } + } + } + + struct NoopProperties; + + impl StreamingProperties for NoopProperties { + fn observe( + &mut self, + _engine: &PanicEngine, + _interaction: &TestInteraction, + _observation: &(), + ) -> Result<(), String> { + Ok(()) + } + + fn finish(&mut self, _engine: &PanicEngine, _outcome: &()) -> Result<(), String> { + Ok(()) + } + } + + #[test] + fn not_crash_catches_execute_panic() { + assert_not_crash_error(PanicPhase::Execute, "execute_interaction", "execute panic"); + } + + #[test] + fn not_crash_catches_finish_panic() { + assert_not_crash_error(PanicPhase::Finish, "finish", "finish panic"); + } + + #[test] + fn not_crash_catches_collect_outcome_panic() { + assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic"); + } + + fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { + let mut runtime = crate::sim::Runtime::new(0).expect("runtime"); + let err = runtime + .block_on(run_streaming( + SingleStepSource::new(), + PanicEngine::new(phase), + NoopProperties, + RunConfig::with_max_interactions(1), + )) + .unwrap_err() + .to_string(); + + assert!(err.contains("[NotCrash]")); + assert!(err.contains(expected_phase)); + assert!(err.contains(expected_payload)); + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs new file mode 100644 index 00000000000..cfebd1a113d --- /dev/null +++ b/crates/dst/src/lib.rs @@ -0,0 +1,47 @@ +//! Deterministic simulation testing utilities for SpacetimeDB crates. +//! +//! Public surface is intentionally narrow and centered on the CLI: +//! +//! - [`client`] for logical client/session identifiers, +//! - [`config`] for run budgets, +//! - [`properties`] for reusable semantic checks, +//! - [`workload`] for scenario identifiers, +//! - [`targets`] for the executable relational-db adapter. +//! +//! ## DST principles +//! +//! 1. Every generated choice comes from a simulator-provided deterministic +//! source. A failing run should be replayable from the printed seed and CLI +//! arguments. Use `--max-interactions` for exact replay; duration budgets are +//! wall-clock soak limits. +//! 2. Workloads describe legal but stressful user behavior. Targets may add +//! faults and lifecycle disruption, but the generator should not depend on +//! target internals. +//! 3. Oracles should check observable state, not merely absence of panics. When +//! possible, compare the target against a simple model. +//! 4. Keep generation, execution, and property checking separate. This makes it +//! clear whether a failure came from an invalid workload, a target bug, or a +//! weak assertion. +//! 5. Prefer streaming state machines over precomputed traces. DST runs should +//! scale by budget and duration without materializing the whole workload. +//! 6. Fault injection must be explicit, configurable, and summarized in the run +//! output. Profiles should start with recoverable API-level behavior before +//! introducing crash or corruption semantics. +//! 7. Shared randomness, weighting, and sampling helpers belong in the +//! workload strategy module, not in ad hoc target or scenario code. + +/// Logical client/session identifiers shared by workloads and targets. +pub mod client; +/// Shared run-budget configuration for DST targets. +pub mod config; +/// Core traits/runners for pluggable workloads and targets. +pub mod core; +/// Reusable semantic properties and oracle-model checks. +pub(crate) mod properties; +mod schema; +/// Local executor and deterministic-decision shim. +pub mod sim; +/// Concrete simulator targets. +pub mod targets; +/// Shared workload generators reused by multiple targets. +pub mod workload; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs new file mode 100644 index 00000000000..b957c4fb0c4 --- /dev/null +++ b/crates/dst/src/main.rs @@ -0,0 +1,124 @@ +use std::time::{SystemTime, UNIX_EPOCH}; + +use clap::{Args, Parser, Subcommand}; +use spacetimedb_dst::{ + config::RunConfig, + targets::descriptor::{RelationalDbCommitlogDescriptor, TargetDescriptor}, + workload::table_ops::TableScenarioId, +}; + +#[derive(Parser, Debug)] +#[command(name = "spacetimedb-dst")] +#[command(about = "Run deterministic simulation targets")] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + Run(RunArgs), +} + +#[derive(Args, Debug)] +struct RunArgs { + #[arg(long, help = "Seed for generated choices. Defaults to wall-clock time.")] + seed: Option, + #[arg( + long, + help = "Wall-clock soak budget such as 500ms, 10s, 5m, or 1h. Use --max-interactions for exact replay." + )] + duration: Option, + #[arg(long, help = "Deterministic interaction budget. Preferred for replayable failures.")] + max_interactions: Option, + #[arg(long, help = "Scenario to run [default: random-crud]")] + scenario: Option, +} + +fn main() -> anyhow::Result<()> { + init_tracing(); + match Cli::parse().command { + Command::Run(args) => run_command(args), + } +} + +fn init_tracing() { + use tracing_subscriber::{fmt, EnvFilter}; + + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + let _ = fmt() + .with_env_filter(filter) + .with_target(false) + .with_thread_ids(false) + .with_thread_names(false) + .compact() + .try_init(); +} + +fn run_command(args: RunArgs) -> anyhow::Result<()> { + let seed = resolve_seed(args.seed); + let config = build_config(args.duration.as_deref(), args.max_interactions)?; + let scenario = resolve_scenario(args.scenario.as_deref()); + + run_prepared_target::(seed, scenario, config) +} + +fn run_prepared_target( + seed: u64, + scenario: D::Scenario, + config: RunConfig, +) -> anyhow::Result<()> +where + D: 'static, + D::Scenario: Send + 'static, +{ + D::prepare(seed, &scenario, &config)?; + std::thread::spawn(move || { + let mut runtime = spacetimedb_dst::sim::Runtime::new(seed)?; + runtime.block_on(run_target::(seed, scenario, config)) + }) + .join() + .unwrap_or_else(|payload| std::panic::resume_unwind(payload)) +} + +fn resolve_seed(seed: Option) -> u64 { + seed.unwrap_or_else(|| { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() as u64 + }) +} + +fn resolve_scenario(scenario: Option<&str>) -> TableScenarioId { + match scenario { + Some("random-crud") | None => TableScenarioId::RandomCrud, + Some(other) => { + eprintln!("unknown scenario: {other}, using random-crud"); + TableScenarioId::RandomCrud + } + } +} + +fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { + Ok(match (duration, max_interactions) { + (Some(duration), Some(max_interactions)) => RunConfig { + max_interactions: Some(max_interactions), + max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), + }, + (Some(duration), None) => RunConfig::with_duration_spec(duration)?, + (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), + (None, None) => RunConfig::with_max_interactions(1_000), + }) +} + +#[allow(clippy::disallowed_macros)] +async fn run_target( + seed: u64, + scenario: D::Scenario, + config: RunConfig, +) -> anyhow::Result<()> { + let line = D::run_streaming(seed, scenario, config).await?; + println!("{line}"); + Ok(()) +} diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs new file mode 100644 index 00000000000..dbe227c2dd9 --- /dev/null +++ b/crates/dst/src/properties.rs @@ -0,0 +1,188 @@ +//! Reusable property runtime shared by DST targets. +//! +//! This module is the boundary between target execution and semantic checking. +//! Targets emit observations and implement [`TargetPropertyAccess`]; property +//! rules compare those observations against either the target's externally +//! visible state, an oracle model, or durable replay state. +//! +//! ## Property Model +//! +//! A property is a named check over a run. It observes generated interactions, +//! target observations, target-visible state, oracle models, and final +//! outcomes. Failures should include a stable property name and enough context +//! to replay the seed or trace. + +mod rules; +mod runtime; + +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{SchemaPlan, SimRow}, + workload::table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, +}; + +pub(crate) use runtime::PropertyRuntime; + +/// Target adapter for property evaluation. +pub(crate) trait TargetPropertyAccess { + fn schema_plan(&self) -> &SchemaPlan; + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String>; + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String>; + fn collect_rows_for_table(&self, table: usize) -> Result, String>; + fn count_rows(&self, table: usize) -> Result; + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String>; +} + +/// Canonical property IDs that can be selected by targets. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PropertyKind { + /// Safety: target execution must not panic. + /// + /// Enforced by the shared streaming runner. + NotCrash, + /// Metamorphic: an inserted row is immediately visible to the inserting session. + InsertSelect, + /// Metamorphic: a deleted row disappears from the deleting session's view. + DeleteSelect, + /// Differential: optimized predicate counts agree with direct row projection. + SelectSelectOptimizer, + /// Metamorphic: boolean partitions preserve total cardinality. + WhereTrueFalseNull, + /// Metamorphic: composite index range scans implement excluded upper bounds correctly. + IndexRangeExcluded, + /// Safety: observed errors match the model-predicted error class. + ErrorMatchesOracle, + /// Safety: model-predicted no-op interactions do not mutate visible state. + NoMutationMatchesModel, + /// Model/oracle: point lookups match the oracle session-visible model. + PointLookupMatchesModel, + /// Model/oracle: predicate counts match the oracle session-visible model. + PredicateCountMatchesModel, + /// Model/oracle: range scans match the oracle session-visible model. + RangeScanMatchesModel, + /// Model/oracle: full scans match the oracle session-visible model. + FullScanMatchesModel, +} + +#[derive(Clone, Debug)] +pub(crate) enum TableMutation { + Inserted { + table: usize, + requested: SimRow, + returned: SimRow, + }, + Deleted { + table: usize, + row: SimRow, + }, +} + +#[derive(Clone, Debug)] +pub(crate) enum TableObservation { + Applied, + Mutated { + conn: SessionId, + mutations: Vec, + in_tx: bool, + }, + ObservedError(TableErrorKind), + PointLookup { + conn: SessionId, + table: usize, + id: u64, + actual: Option, + }, + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + actual: Vec, + }, + FullScan { + conn: SessionId, + table: usize, + actual: Vec, + }, + CommitOrRollback, +} + +struct PropertyContext<'a> { + access: &'a dyn TargetPropertyAccess, + models: &'a runtime::PropertyModels, +} + +#[derive(Clone, Debug)] +enum PropertyEvent<'a> { + TableInteractionApplied, + RowInserted { + conn: SessionId, + table: usize, + returned: &'a SimRow, + in_tx: bool, + }, + RowDeleted { + conn: SessionId, + table: usize, + row: &'a SimRow, + in_tx: bool, + }, + ObservedError { + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &'a TableWorkloadInteraction, + }, + NoMutation { + subject: Option<(SessionId, usize)>, + interaction: &'a TableWorkloadInteraction, + observation: &'a TableObservation, + }, + PointLookup { + conn: SessionId, + table: usize, + id: u64, + actual: &'a Option, + }, + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: &'a AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: SessionId, + table: usize, + cols: &'a [u16], + lower: &'a Bound, + upper: &'a Bound, + actual: &'a [SimRow], + }, + FullScan { + conn: SessionId, + table: usize, + actual: &'a [SimRow], + }, + CommitOrRollback, + TableWorkloadFinished(&'a TableWorkloadOutcome), +} diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs new file mode 100644 index 00000000000..9d2552014c2 --- /dev/null +++ b/crates/dst/src/properties/rules.rs @@ -0,0 +1,483 @@ +use std::ops::Bound; + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; + +use crate::{ + client::SessionId, + schema::{SchemaPlan, SimRow}, + workload::table_ops::{TableOperation, TableScenario}, +}; + +use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation}; + +pub(crate) trait PropertyRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let _ = ctx; + let _ = event; + Ok(()) + } +} + +pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { + match kind { + PropertyKind::NotCrash => Box::::default(), + PropertyKind::InsertSelect => Box::::default(), + PropertyKind::DeleteSelect => Box::::default(), + PropertyKind::SelectSelectOptimizer => Box::::default(), + PropertyKind::WhereTrueFalseNull => Box::::default(), + PropertyKind::IndexRangeExcluded => Box::::default(), + PropertyKind::ErrorMatchesOracle => Box::::default(), + PropertyKind::NoMutationMatchesModel => Box::::default(), + PropertyKind::PointLookupMatchesModel => Box::::default(), + PropertyKind::PredicateCountMatchesModel => Box::::default(), + PropertyKind::RangeScanMatchesModel => Box::::default(), + PropertyKind::FullScanMatchesModel => Box::::default(), + } +} + +pub(crate) fn oracle_table_state_rule(scenario: S, schema: SchemaPlan) -> Box +where + S: TableScenario + 'static, +{ + Box::new(OracleTableStateRule::new(scenario, schema)) +} + +#[derive(Default)] +struct NotCrashRule; + +impl PropertyRule for NotCrashRule {} + +struct OracleTableStateRule { + scenario: S, + schema: SchemaPlan, +} + +impl OracleTableStateRule { + fn new(scenario: S, schema: SchemaPlan) -> Self { + Self { scenario, schema } + } +} + +impl PropertyRule for OracleTableStateRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::TableWorkloadFinished(outcome) => { + let expected_rows = ctx.models.table().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "[OracleTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("[OracleTableState] scenario invariant failed: {err}")) + } + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct InsertSelectRule; + +impl PropertyRule for InsertSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowInserted { + conn, table, returned, .. + } = event + else { + return Ok(()); + }; + let id = returned.id().ok_or_else(|| "row missing id column".to_string())?; + let found = ctx.access.lookup_in_connection(conn, table, id)?; + if found != Some(returned.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={returned:?}, actual={found:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct DeleteSelectRule; + +impl PropertyRule for DeleteSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowDeleted { conn, table, row, .. } = event else { + return Ok(()); + }; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if ctx.access.lookup_in_connection(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) + } +} + +fn post_write_check_tables(ctx: &PropertyContext<'_>, event: &PropertyEvent<'_>) -> Option> { + match event { + PropertyEvent::RowInserted { + table, in_tx: false, .. + } + | PropertyEvent::RowDeleted { + table, in_tx: false, .. + } => Some(vec![*table]), + PropertyEvent::CommitOrRollback => Some((0..ctx.access.schema_plan().tables.len()).collect()), + _ => None, + } +} + +#[derive(Default)] +struct NoRecRule; + +impl PropertyRule for NoRecRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + continue; + }; + let scanned_rows = ctx.access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + continue; + } + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => continue, + }; + let where_count = ctx.access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct TlpRule; + +impl PropertyRule for TlpRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + continue; + }; + let total = ctx.access.count_rows(table)?; + let true_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct IndexRangeExcludedRule; + +impl PropertyRule for IndexRangeExcludedRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; + + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = ctx.access.collect_rows_for_table(table)?; + if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { + continue; + } + + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = ctx.access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } + } + } + + Ok(()) + } +} + +#[derive(Default)] +struct ErrorMatchesOracleRule; + +impl PropertyRule for ErrorMatchesOracleRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + } = event + else { + return Ok(()); + }; + if observed != predicted { + return Err(format!( + "[ErrorMatchesOracle] observed {observed:?}, but model predicted {predicted:?}: {interaction:?}", + )); + } + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[ErrorDoesNotMutate]", interaction)?; + } + Ok(()) + } +} + +#[derive(Default)] +struct NoMutationMatchesModelRule; + +impl PropertyRule for NoMutationMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::NoMutation { + interaction, + subject, + observation, + } = event + else { + return Ok(()); + }; + if let TableOperation::InsertRows { table, rows, .. } = &interaction.op + && let TableObservation::Mutated { mutations, .. } = observation + { + if mutations.len() != rows.len() { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned wrong mutation count: expected={}, actual={}; interaction={interaction:?}", + rows.len(), + mutations.len() + )); + } + for (row, mutation) in rows.iter().zip(mutations) { + let TableMutation::Inserted { + table: observed_table, + requested, + returned, + } = mutation + else { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned non-insert mutation: {mutation:?}; interaction={interaction:?}" + )); + }; + if observed_table != table || requested != row || returned != row { + return Err(format!( + "[NoMutationMatchesModel] no-op insert returned row mismatch: expected table={table}, row={row:?}; observed table={observed_table}, requested={requested:?}, returned={returned:?}; interaction={interaction:?}" + )); + } + } + } + + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[NoMutationMatchesModel]", interaction)?; + } + Ok(()) + } +} + +fn assert_visible_rows_match_model( + ctx: &PropertyContext<'_>, + conn: SessionId, + table: usize, + property: &str, + interaction: &crate::workload::table_ops::TableWorkloadInteraction, +) -> Result<(), String> { + let mut actual = ctx.access.collect_rows_in_connection(conn, table)?; + actual.sort_by_key(|row| row.id().unwrap_or_default()); + let expected = ctx.models.table().visible_rows(conn, table); + if actual != expected { + return Err(format!( + "{property} visible rows changed unexpectedly on conn={conn}, table={table}: expected={expected:?}, actual={actual:?}; interaction={interaction:?}" + )); + } + Ok(()) +} + +#[derive(Default)] +struct PointLookupMatchesModelRule; + +impl PropertyRule for PointLookupMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PointLookup { + conn, + table, + id, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().lookup_by_id(conn, table, id); + if *actual != expected { + return Err(format!( + "[Model::PointLookup] mismatch conn={conn}, table={table}, id={id}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct PredicateCountMatchesModelRule; + +impl PropertyRule for PredicateCountMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().predicate_count(conn, table, col, value); + if actual != expected { + return Err(format!( + "[Model::PredicateCount] mismatch conn={conn}, table={table}, col={col}, value={value:?}: expected={expected}, actual={actual}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct RangeScanMatchesModelRule; + +impl PropertyRule for RangeScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().range_scan(conn, table, cols, lower, upper); + if actual != expected.as_slice() { + return Err(format!( + "[Model::RangeScan] mismatch conn={conn}, table={table}, cols={cols:?}, lower={lower:?}, upper={upper:?}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct FullScanMatchesModelRule; + +impl PropertyRule for FullScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::FullScan { conn, table, actual } = event else { + return Ok(()); + }; + let expected = ctx.models.table().full_scan(conn, table); + if actual != expected.as_slice() { + return Err(format!( + "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs new file mode 100644 index 00000000000..52951b10b17 --- /dev/null +++ b/crates/dst/src/properties/runtime.rs @@ -0,0 +1,427 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + core::{StreamingProperties, TargetEngine}, + schema::{SchemaPlan, SimRow}, + workload::table_ops::{PredictedOutcome, TableErrorKind, TableOracle, TableWorkloadInteraction, TableWorkloadOutcome}, +}; + +use super::{ + rules::{oracle_table_state_rule, rule_for_kind, PropertyRule}, + PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess, +}; + +#[derive(Clone, Debug)] +pub(super) struct PropertyModels { + table: TableModel, +} + +#[derive(Clone, Debug)] +pub(super) struct TableModel { + oracle: TableOracle, +} + +impl PropertyModels { + pub(super) fn new(table_count: usize, num_connections: usize) -> Self { + Self { + table: TableModel { + oracle: TableOracle::new(table_count, num_connections), + }, + } + } + + pub(super) fn table(&self) -> &TableModel { + &self.table + } + + fn predict(&self, interaction: &TableWorkloadInteraction) -> Result { + self.table.oracle.predict(&interaction.op) + } + + fn apply(&mut self, interaction: &TableWorkloadInteraction) { + self.table.oracle.apply(&interaction.op); + } +} + +impl TableModel { + pub(super) fn committed_rows(&self) -> Vec> { + self.oracle.clone().committed_rows() + } + + pub(super) fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { + self.oracle.lookup_by_id(conn, table, id) + } + + pub(super) fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.oracle.predicate_count(conn, table, col, value) + } + + pub(super) fn range_scan( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + self.oracle.range_scan(conn, table, cols, lower, upper) + } + + pub(super) fn full_scan(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.oracle.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } + + pub(super) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.oracle.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } +} + +/// Mutable runtime holding selected property implementations. +pub(crate) struct PropertyRuntime { + rules: Vec, + models: PropertyModels, +} + +impl PropertyRuntime { + pub fn with_kinds(kinds: &[PropertyKind]) -> Self { + let rules = kinds.iter().copied().map(rule_for_kind).map(RuleEntry::new).collect(); + Self { + rules, + models: PropertyModels::new(0, 0), + } + } + + pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self + where + S: crate::workload::table_ops::TableScenario + 'static, + { + let mut runtime = Self { + models: PropertyModels::new(schema.tables.len(), num_connections), + ..Self::default() + }; + runtime + .rules + .push(RuleEntry::new(oracle_table_state_rule(scenario, schema))); + runtime + } + + fn observe_event(&mut self, access: &dyn TargetPropertyAccess, event: PropertyEvent<'_>) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, event.clone())?; + } + Ok(()) + } + + fn on_table_interaction( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.models.apply(interaction); + self.observe_event(access, PropertyEvent::TableInteractionApplied) + } + + fn on_mutations( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + mutations: &[TableMutation], + in_tx: bool, + ) -> Result<(), String> { + for mutation in mutations { + match mutation { + TableMutation::Inserted { + table, + requested: _, + returned, + } => self.observe_event( + access, + PropertyEvent::RowInserted { + conn, + table: *table, + returned, + in_tx, + }, + )?, + TableMutation::Deleted { table, row } => self.observe_event( + access, + PropertyEvent::RowDeleted { + conn, + table: *table, + row, + in_tx, + }, + )?, + } + } + Ok(()) + } + + fn on_observed_error( + &mut self, + access: &dyn TargetPropertyAccess, + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + }, + ) + } + + fn on_no_mutation( + &mut self, + access: &dyn TargetPropertyAccess, + subject: Option<(SessionId, usize)>, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::NoMutation { + subject, + interaction, + observation, + }, + ) + } + + fn on_point_lookup( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + id: u64, + actual: &Option, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::PointLookup { + conn, + table, + id, + actual, + }, + ) + } + + fn on_predicate_count( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + col: u16, + value: &AlgebraicValue, + actual: usize, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + }, + ) + } + + #[allow(clippy::too_many_arguments)] + fn on_range_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + actual: &[SimRow], + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + }, + ) + } + + fn on_full_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + actual: &[SimRow], + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::FullScan { conn, table, actual }) + } + + fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + self.observe_event(access, PropertyEvent::CommitOrRollback) + } + + fn on_table_workload_finish( + &mut self, + access: &dyn TargetPropertyAccess, + outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::TableWorkloadFinished(outcome)) + } + + fn observe_table_observation( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + let prediction = self.models.predict(interaction)?; + match (&prediction, observed_error_kind(observation)) { + (PredictedOutcome::Error { kind, subject }, Some(observed)) => { + self.on_observed_error(access, observed, *kind, *subject, interaction)?; + return Ok(()); + } + (PredictedOutcome::Error { kind, .. }, None) => { + return Err(format!( + "[ErrorMatchesOracle] expected {kind:?}, observed successful result {observation:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, Some(observed)) => { + return Err(format!( + "[ErrorMatchesOracle] expected success, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, None) => self.on_table_interaction(access, interaction)?, + (PredictedOutcome::NoMutation { subject: _ }, Some(observed)) => { + return Err(format!( + "[NoMutationMatchesModel] expected no mutation, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::NoMutation { subject }, None) => { + self.on_no_mutation(access, *subject, interaction, observation)?; + } + } + + match observation { + TableObservation::Applied => {} + TableObservation::Mutated { conn, mutations, in_tx } => { + self.on_mutations(access, *conn, mutations, *in_tx)? + } + TableObservation::ObservedError(_) => {} + TableObservation::PointLookup { + conn, + table, + id, + actual, + } => self.on_point_lookup(access, *conn, *table, *id, actual)?, + TableObservation::PredicateCount { + conn, + table, + col, + value, + actual, + } => self.on_predicate_count(access, *conn, *table, *col, value, *actual)?, + TableObservation::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, + TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, + TableObservation::CommitOrRollback => {} + } + + if matches!(observation, TableObservation::CommitOrRollback) { + self.on_commit_or_rollback(access)?; + } + Ok(()) + } +} + +impl StreamingProperties for PropertyRuntime +where + E: TargetEngine< + TableWorkloadInteraction, + Observation = TableObservation, + Outcome = TableWorkloadOutcome, + Error = String, + > + TargetPropertyAccess, +{ + fn observe( + &mut self, + engine: &E, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + self.observe_table_observation(engine, interaction, observation) + } + + fn finish(&mut self, engine: &E, outcome: &TableWorkloadOutcome) -> Result<(), String> { + self.on_table_workload_finish(engine, outcome) + } +} + +struct RuleEntry { + rule: Box, +} + +impl RuleEntry { + fn new(rule: Box) -> Self { + Self { rule } + } +} + +impl Default for PropertyRuntime { + fn default() -> Self { + Self::with_kinds(&[ + PropertyKind::NotCrash, + PropertyKind::InsertSelect, + PropertyKind::DeleteSelect, + PropertyKind::SelectSelectOptimizer, + PropertyKind::WhereTrueFalseNull, + PropertyKind::IndexRangeExcluded, + PropertyKind::ErrorMatchesOracle, + PropertyKind::NoMutationMatchesModel, + PropertyKind::PointLookupMatchesModel, + PropertyKind::PredicateCountMatchesModel, + PropertyKind::RangeScanMatchesModel, + PropertyKind::FullScanMatchesModel, + ]) + } +} + +fn observed_error_kind(observation: &TableObservation) -> Option { + match observation { + TableObservation::ObservedError(kind) => Some(*kind), + TableObservation::Applied + | TableObservation::Mutated { .. } + | TableObservation::PointLookup { .. } + | TableObservation::PredicateCount { .. } + | TableObservation::RangeScan { .. } + | TableObservation::FullScan { .. } + | TableObservation::CommitOrRollback => None, + } +} diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs new file mode 100644 index 00000000000..fdaaa627954 --- /dev/null +++ b/crates/dst/src/schema.rs @@ -0,0 +1,196 @@ +//! Shared schema and row model used by DST targets. + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; + +use crate::sim::Rng; + +/// Generated schema for one simulator case. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SchemaPlan { + /// User-visible tables installed before the workload starts. + pub tables: Vec, +} + +/// Table definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TablePlan { + /// Stable logical table name used in generated interactions and assertions. + pub name: String, + /// Ordered column definitions. Column 0 is treated as the primary id column. + pub columns: Vec, + /// Additional indexed column sets beyond the implicit primary id index. + /// + /// A value like `[1]` means a single-column secondary index on column 1. + /// A value like `[0, 1]` means a composite btree index over columns 0 and 1. + pub extra_indexes: Vec>, +} + +/// Column definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ColumnPlan { + /// Column name installed into the target schema. + pub name: String, + /// Algebraic type for generated values in this column. + pub ty: AlgebraicType, +} + +/// Serializable row representation used by generated interactions. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SimRow { + /// Column values in schema order. + pub values: Vec, +} + +pub fn generate_supported_type(rng: &Rng) -> AlgebraicType { + match rng.index(12) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::I8, + 2 => AlgebraicType::U8, + 3 => AlgebraicType::I16, + 4 => AlgebraicType::U16, + 5 => AlgebraicType::I32, + 6 => AlgebraicType::U32, + 7 => AlgebraicType::I64, + 8 => AlgebraicType::U64, + 9 => AlgebraicType::I128, + 10 => AlgebraicType::U128, + _ => AlgebraicType::String, + } +} + +pub fn generate_value_for_type(rng: &Rng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + if rng.index(5) == 0 { + return edge_value_for_type(rng, ty, idx); + } + + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => AlgebraicValue::I8(((rng.next_u64() % 64) as i8) - 32), + AlgebraicType::U8 => AlgebraicValue::U8((rng.next_u64() % u8::MAX as u64) as u8), + AlgebraicType::I16 => AlgebraicValue::I16(((rng.next_u64() % 2048) as i16) - 1024), + AlgebraicType::U16 => AlgebraicValue::U16((rng.next_u64() % u16::MAX as u64) as u16), + AlgebraicType::I32 => AlgebraicValue::I32(((rng.next_u64() % 200_000) as i32) - 100_000), + AlgebraicType::U32 => AlgebraicValue::U32((rng.next_u64() % 1_000_000) as u32), + AlgebraicType::I64 => AlgebraicValue::I64(((rng.next_u64() % 2_000_000) as i64) - 1_000_000), + AlgebraicType::U64 => AlgebraicValue::U64((rng.next_u64() % 1000) + idx as u64), + AlgebraicType::I128 => { + let v = ((rng.next_u64() % 2_000_000) as i128) - 1_000_000; + AlgebraicValue::I128(v.into()) + } + AlgebraicType::U128 => { + let v = (rng.next_u64() % 2_000_000) as u128; + AlgebraicValue::U128(v.into()) + } + AlgebraicType::String => AlgebraicValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000).into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +pub fn default_value_for_type(ty: &AlgebraicType) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(false), + AlgebraicType::I8 => AlgebraicValue::I8(0), + AlgebraicType::U8 => AlgebraicValue::U8(0), + AlgebraicType::I16 => AlgebraicValue::I16(0), + AlgebraicType::U16 => AlgebraicValue::U16(0), + AlgebraicType::I32 => AlgebraicValue::I32(0), + AlgebraicType::U32 => AlgebraicValue::U32(0), + AlgebraicType::I64 => AlgebraicValue::I64(0), + AlgebraicType::U64 => AlgebraicValue::U64(0), + AlgebraicType::I128 => AlgebraicValue::I128(0.into()), + AlgebraicType::U128 => AlgebraicValue::U128(0.into()), + AlgebraicType::String => AlgebraicValue::String("".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +pub fn distinct_value_for_type(ty: &AlgebraicType, current: &AlgebraicValue) -> AlgebraicValue { + let default = default_value_for_type(ty); + if &default != current { + return default; + } + + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::I8 => AlgebraicValue::I8(1), + AlgebraicType::U8 => AlgebraicValue::U8(1), + AlgebraicType::I16 => AlgebraicValue::I16(1), + AlgebraicType::U16 => AlgebraicValue::U16(1), + AlgebraicType::I32 => AlgebraicValue::I32(1), + AlgebraicType::U32 => AlgebraicValue::U32(1), + AlgebraicType::I64 => AlgebraicValue::I64(1), + AlgebraicType::U64 => AlgebraicValue::U64(1), + AlgebraicType::I128 => AlgebraicValue::I128(1.into()), + AlgebraicType::U128 => AlgebraicValue::U128(1.into()), + AlgebraicType::String => AlgebraicValue::String("dst_unique_conflict".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +fn edge_value_for_type(rng: &Rng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => [i8::MIN, -1, 0, 1, i8::MAX][rng.index(5)].into(), + AlgebraicType::U8 => [0, 1, u8::MAX][rng.index(3)].into(), + AlgebraicType::I16 => [i16::MIN, -1, 0, 1, i16::MAX][rng.index(5)].into(), + AlgebraicType::U16 => [0, 1, u16::MAX][rng.index(3)].into(), + AlgebraicType::I32 => [i32::MIN, -1, 0, 1, i32::MAX][rng.index(5)].into(), + AlgebraicType::U32 => [0, 1, u32::MAX][rng.index(3)].into(), + AlgebraicType::I64 => [i64::MIN, -1, 0, 1, i64::MAX][rng.index(5)].into(), + AlgebraicType::U64 => [0, 1, u64::MAX.saturating_sub(idx as u64)][rng.index(3)].into(), + AlgebraicType::I128 => { + let value = [i128::MIN, -1, 0, 1, i128::MAX][rng.index(5)]; + AlgebraicValue::I128(value.into()) + } + AlgebraicType::U128 => { + let value = [0, 1, u128::MAX][rng.index(3)]; + AlgebraicValue::U128(value.into()) + } + AlgebraicType::String => match rng.index(5) { + 0 => AlgebraicValue::String("".into()), + 1 => AlgebraicValue::String("same".into()), + 2 => AlgebraicValue::String("x".repeat(512).into()), + 3 => AlgebraicValue::String(format!("edge_{}", char::from_u32(0x2603).expect("valid char")).into()), + _ => AlgebraicValue::String(format!("v{idx}_edge").into()), + }, + other => panic!("unsupported generated column type: {other:?}"), + } +} + +impl SimRow { + pub fn to_product_value(&self) -> ProductValue { + ProductValue::from_iter(self.values.iter().cloned()) + } + + pub fn to_bsatn(&self) -> anyhow::Result> { + Ok(spacetimedb_sats::bsatn::to_vec(&self.to_product_value())?) + } + + pub fn from_product_value(value: ProductValue) -> Self { + SimRow { + values: value.elements.to_vec(), + } + } + + pub fn project_key(&self, cols: &[u16]) -> Self { + let values = cols + .iter() + .map(|&col| self.values[col as usize].clone()) + .collect::>(); + SimRow { values } + } + + pub fn to_algebraic_value(&self) -> AlgebraicValue { + match self.values.as_slice() { + [value] => value.clone(), + _ => ProductValue::from_iter(self.values.iter().cloned()).into(), + } + } + + pub fn id(&self) -> Option { + match self.values.first() { + Some(AlgebraicValue::U64(value)) => Some(*value), + _ => None, + } + } +} diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs new file mode 100644 index 00000000000..7fdd83618fc --- /dev/null +++ b/crates/dst/src/sim/commitlog.rs @@ -0,0 +1,317 @@ +//! Commitlog storage fault-injection support for DST targets. + +use std::{ + fmt, + io::{self, BufRead, Read, Seek, Write}, +}; + +use spacetimedb_commitlog::{ + repo::{ + CompressOnce, CompressionStats, Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut, + }, + segment::{FileLike, Header}, +}; + +use crate::sim::storage_faults::{ + is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, + StorageFaultKind, StorageFaultSummary, +}; + +pub(crate) type CommitlogFaultConfig = StorageFaultConfig; +pub(crate) type CommitlogFaultSummary = StorageFaultSummary; + +/// Returns true if `text` contains an error created by this fault layer. +pub(crate) fn is_injected_disk_error_text(text: &str) -> bool { + is_injected_fault_text(StorageFaultDomain::Disk, text) +} + +/// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. +/// +/// Faults stay within normal file API semantics: calls may take deterministic simulated time, +/// reads/writes may complete partially, and configured calls may return transient I/O errors. +/// The wrapper deliberately avoids corruption or crash-style partial persistence; those need a +/// stronger durability model before we enable them. +#[derive(Clone)] +pub(crate) struct FaultableRepo { + inner: R, + faults: StorageFaultController, +} + +impl FaultableRepo { + pub(crate) fn new(inner: R, config: CommitlogFaultConfig) -> Self { + Self { + inner, + faults: StorageFaultController::new(config, StorageFaultDomain::Disk), + } + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> CommitlogFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) + } +} + +impl fmt::Display for FaultableRepo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}+faultable({:?})", self.inner, self.faults.summary().profile) + } +} + +impl Repo for FaultableRepo { + type SegmentWriter = FaultableSegment; + type SegmentReader = FaultableReader; + + fn create_segment(&self, offset: u64, header: Header) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .create_segment(offset, header) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) + } + + fn open_segment_reader(&self, offset: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .open_segment_reader(offset) + .map(|inner| FaultableReader::new(inner, self.faults.clone())) + } + + fn open_segment_writer(&self, offset: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .open_segment_writer(offset) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) + } + + fn segment_file_path(&self, offset: u64) -> Option { + self.inner.segment_file_path(offset) + } + + fn remove_segment(&self, offset: u64) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.remove_segment(offset) + } + + fn compress_segment_with(&self, offset: u64, f: impl CompressOnce) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.compress_segment_with(offset, f) + } + + fn existing_offsets(&self) -> io::Result> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.existing_offsets() + } + + fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.create_offset_index(offset, cap) + } + + fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.remove_offset_index(offset) + } + + fn get_offset_index(&self, offset: TxOffset) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.get_offset_index(offset) + } +} + +impl RepoWithoutLockFile for FaultableRepo {} + +pub(crate) struct FaultableSegment { + inner: S, + faults: StorageFaultController, +} + +impl FaultableSegment { + fn new(inner: S, faults: StorageFaultController) -> Self { + Self { inner, faults } + } +} + +impl Read for FaultableSegment { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + self.inner.read(&mut buf[..len]) + } +} + +impl Write for FaultableSegment { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Write)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Write)?; + let is_partial = self.faults.sample_partial_failure(); + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Write); + let n = self.inner.write(&buf[..len])?; + if is_partial && n > 0 { + self.faults.arm_pending_error(); + } + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Flush)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Flush)?; + self.inner.flush() + } +} + +impl Seek for FaultableSegment { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.faults.maybe_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for FaultableSegment { + fn segment_len(&mut self) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.segment_len() + } +} + +impl FileLike for FaultableSegment { + fn fsync(&mut self) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Fsync)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Fsync)?; + self.inner.fsync() + } + + fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.check_pending_error(StorageFaultKind::Metadata)?; + self.faults.maybe_error(StorageFaultKind::NoSpace)?; + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.ftruncate(tx_offset, size) + } +} + +pub(crate) struct FaultableReader { + inner: S, + faults: StorageFaultController, +} + +impl FaultableReader { + fn new(inner: S, faults: StorageFaultController) -> Self { + Self { inner, faults } + } +} + +impl Read for FaultableReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + self.inner.read(&mut buf[..len]) + } +} + +impl BufRead for FaultableReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let buf = self.inner.fill_buf()?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + Ok(&buf[..len]) + } + + fn consume(&mut self, amount: usize) { + self.inner.consume(amount); + } +} + +impl Seek for FaultableReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.faults.maybe_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for FaultableReader { + fn segment_len(&mut self) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.segment_len() + } +} + +impl SegmentReader for FaultableReader { + fn sealed(&self) -> bool { + self.inner.sealed() + } +} + +#[cfg(test)] +mod tests { + use std::io::{BufRead, Cursor}; + + use crate::{config::CommitlogFaultProfile, sim}; + + use super::*; + + fn always_short_read_config() -> CommitlogFaultConfig { + CommitlogFaultConfig { + profile: CommitlogFaultProfile::Default, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 1.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + no_space_prob: 0.0, + partial_failure_prob: 0.0, + } + } + + #[test] + fn buf_read_path_applies_short_read_faults() { + let mut runtime = sim::Runtime::new(55).unwrap(); + let handle = runtime.handle(); + handle.enable_buggify(); + runtime.block_on(async { + let faults = StorageFaultController::new(always_short_read_config(), StorageFaultDomain::Disk); + let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); + + assert_eq!(reader.fill_buf().unwrap(), &[1, 2]); + assert_eq!(faults.summary().short_read, 1); + }); + } +} diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs new file mode 100644 index 00000000000..51cea430fc6 --- /dev/null +++ b/crates/dst/src/sim/mod.rs @@ -0,0 +1,118 @@ +//! Local simulation shim for the DST crate. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +pub(crate) mod commitlog; +pub(crate) mod snapshot; +pub(crate) mod storage_faults; + +use std::{cell::RefCell, future::Future, time::Duration}; + +pub use spacetimedb_runtime::sim::{yield_now, Handle, JoinHandle, Node, NodeBuilder, NodeId, Rng}; + +thread_local! { + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; +} + +struct CurrentHandleGuard { + previous: Option, +} + +fn enter_current_handle(handle: Handle) -> CurrentHandleGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.replace(Some(handle))); + CurrentHandleGuard { previous } +} + +impl Drop for CurrentHandleGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + let _ = slot.replace(self.previous.take()); + }); + } +} + +pub(crate) fn current_handle() -> Option { + CURRENT_HANDLE.with(|slot| slot.borrow().clone()) +} + +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} + +pub(crate) fn fork_seed(seed: u64, discriminator: u64) -> u64 { + splitmix64(seed ^ discriminator.wrapping_mul(GAMMA)) +} + +/// DST-facing wrapper that keeps the top-level seed type local to this crate. +pub struct Runtime { + inner: spacetimedb_runtime::sim::Runtime, +} + +impl Runtime { + pub fn new(seed: u64) -> anyhow::Result { + Ok(Self { + inner: spacetimedb_runtime::sim::Runtime::new(seed), + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + let _guard = enter_current_handle(self.inner.handle()); + spacetimedb_runtime::sim_std::block_on(&mut self.inner, future) + } + + pub fn elapsed(&self) -> Duration { + self.inner.elapsed() + } + + pub fn handle(&self) -> Handle { + self.inner.handle() + } + + pub fn create_node(&self) -> NodeBuilder { + self.inner.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.inner.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.inner.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.inner.spawn_on(node, future) + } + + pub fn check_determinism(seed: u64, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim_std::check_determinism(seed, make_future) + } + + pub fn check_determinism_with(seed: u64, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim_std::check_determinism(seed, make_future) + } +} +#[allow(dead_code)] +pub(crate) fn decision_source(seed: u64) -> Rng { + Rng::new(seed) +} diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs new file mode 100644 index 00000000000..13c0e3a43c3 --- /dev/null +++ b/crates/dst/src/sim/snapshot.rs @@ -0,0 +1,287 @@ +//! In-memory snapshot storage with deterministic fault injection. +//! +//! This is intentionally a semantic snapshot seam, not a filesystem facade. It +//! keeps DST snapshot bytes inside controlled memory storage, while still using +//! the same snapshot capture/restore shape as production. + +use std::{ops::Range, sync::Arc}; + +use spacetimedb_durability::TxOffset; +use spacetimedb_lib::Identity; +use spacetimedb_snapshot::{ + BoxedPendingSnapshot, CompressionStats, MemorySnapshotRepository, PendingSnapshot, ReconstructedSnapshot, + SnapshotError, SnapshotRepo, SnapshotStore, +}; +use spacetimedb_table::{blob_store::BlobStore, page_pool::PagePool, table::Table}; + +use crate::sim::storage_faults::{ + is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, + StorageFaultSummary, +}; + +pub(crate) type SnapshotFaultConfig = StorageFaultConfig; + +/// Returns true if `text` contains an error created by this snapshot fault layer. +pub(crate) fn is_injected_snapshot_error_text(text: &str) -> bool { + is_injected_fault_text(StorageFaultDomain::Snapshot, text) +} + +pub(crate) struct SnapshotRestoreRepo { + pub(crate) store: Option>, + pub(crate) restored_snapshot_offset: Option, + pub(crate) latest_snapshot_offset: Option, +} + +/// In-memory snapshot repository wrapped with deterministic operation-level faults. +/// +/// The bytes/pages are written and read by `spacetimedb-snapshot`; this wrapper +/// only decides whether a DST operation reaches that repository. That keeps +/// restore semantics aligned with production without requiring the +/// Tokio-backed `SnapshotWorker` or the host filesystem inside the simulator. +/// +/// This is the intended boundary for the current DST target. It exercises +/// capture/restore behavior, retry classification, and replay correctness. It +/// does not model torn snapshot pages or byte-level corruption. +#[derive(Clone)] +pub(crate) struct BuggifiedSnapshotRepo { + repo: Arc, + faults: StorageFaultController, +} + +impl BuggifiedSnapshotRepo { + pub(crate) fn new(config: SnapshotFaultConfig) -> anyhow::Result { + Ok(Self { + repo: Arc::new(MemorySnapshotRepository::new(Identity::ZERO, 0)), + faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot), + }) + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> StorageFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) + } + + pub(crate) fn latest_snapshot_unfaulted(&self) -> Result, String> { + self.with_faults_suspended(|| { + self.repo + .latest_snapshot() + .map_err(|err| format!("snapshot metadata read failed: {err}")) + }) + } + + pub(crate) fn repo_for_restore(&self, durable_offset: Option) -> Result { + let latest_snapshot_offset = self.latest_snapshot_unfaulted()?; + self.faults.maybe_latency(); + self.inject(StorageFaultKind::Metadata)?; + let Some(durable_offset) = durable_offset else { + return Ok(SnapshotRestoreRepo { + store: None, + restored_snapshot_offset: None, + latest_snapshot_offset, + }); + }; + let restored_snapshot_offset = self + .repo + .latest_snapshot_older_than(durable_offset) + .map_err(|err| format!("snapshot metadata before restore failed: {err}"))?; + if restored_snapshot_offset.is_none() { + return Ok(SnapshotRestoreRepo { + store: None, + restored_snapshot_offset, + latest_snapshot_offset, + }); + } + + self.inject(StorageFaultKind::Open)?; + self.inject(StorageFaultKind::Read)?; + Ok(SnapshotRestoreRepo { + store: Some(self.repo.clone()), + restored_snapshot_offset, + latest_snapshot_offset, + }) + } + + fn inject(&self, kind: StorageFaultKind) -> Result<(), String> { + self.faults.maybe_error(kind).map_err(|err| err.to_string()) + } +} + +impl SnapshotStore for BuggifiedSnapshotRepo { + fn database_identity(&self) -> Identity { + self.repo.database_identity() + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Write) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Fsync) + .map_err(SnapshotError::Io)?; + self.repo.capture_snapshot(tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Read) + .map_err(SnapshotError::Io)?; + self.repo.read_snapshot(tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot_older_than(upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot() + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_newer_snapshots(upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::NoSpace) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_snapshot(tx_offset) + } +} + +struct BuggifiedPendingSnapshot { + tx_offset: TxOffset, +} + +impl PendingSnapshot for BuggifiedPendingSnapshot { + fn sync_all(self: Box) -> Result { + Ok(self.tx_offset) + } +} + +impl SnapshotRepo for BuggifiedSnapshotRepo { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.capture_snapshot(tables, blobs, tx_offset)?; + Ok(Box::new(BuggifiedPendingSnapshot { tx_offset })) + } + + fn compress_snapshots(&self, _stats: &mut CompressionStats, _range: Range) -> Result<(), SnapshotError> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::{config::CommitlogFaultProfile, sim}; + + use super::*; + + fn no_faults() -> SnapshotFaultConfig { + SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off) + } + + fn always_metadata_error() -> SnapshotFaultConfig { + SnapshotFaultConfig { + metadata_error_prob: 1.0, + ..SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Default) + } + } + + #[test] + fn repo_without_snapshots_is_not_used_for_restore() { + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(no_faults()).unwrap(); + + assert!(repo.repo_for_restore(Some(0)).unwrap().store.is_none()); + }) + } + + #[test] + fn injected_metadata_error_is_counted_and_recognizable() { + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error()).unwrap(); + repo.enable_faults(); + + let err = match repo.repo_for_restore(Some(0)) { + Ok(_) => panic!("expected injected snapshot metadata error"), + Err(err) => err, + }; + + assert!(is_injected_snapshot_error_text(&err)); + assert_eq!(repo.fault_summary().metadata_error, 1); + }) + } + + #[test] + fn suspended_faults_allow_restore_probe() { + let mut runtime = sim::Runtime::new(42).unwrap(); + runtime.block_on(async { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error()).unwrap(); + repo.enable_faults(); + + let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); + + assert!(restore.unwrap().store.is_none()); + assert_eq!(repo.fault_summary().metadata_error, 0); + }) + } +} diff --git a/crates/dst/src/sim/storage_faults.rs b/crates/dst/src/sim/storage_faults.rs new file mode 100644 index 00000000000..a1c59e5ca71 --- /dev/null +++ b/crates/dst/src/sim/storage_faults.rs @@ -0,0 +1,372 @@ +//! Shared storage fault-injection primitives for DST simulation helpers. +//! +//! Fault decisions use [`spacetimedb_runtime::sim::Handle::buggify_with_prob`] +//! so they are gated by the runtime's centralized buggify flag. + +use std::{ + io, + sync::{ + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use crate::config::CommitlogFaultProfile; + +const INJECTED_ERROR_PREFIX: &str = "dst injected "; + +pub(crate) fn is_injected_fault_text(domain: StorageFaultDomain, text: &str) -> bool { + text.contains(&format!("{INJECTED_ERROR_PREFIX}{} ", domain.label())) +} + +/// API-level storage fault profile for DST-only storage wrappers. +#[derive(Clone, Copy, Debug)] +pub(crate) struct StorageFaultConfig { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) latency_prob: f64, + pub(crate) long_latency_prob: f64, + pub(crate) short_io_prob: f64, + pub(crate) read_error_prob: f64, + pub(crate) write_error_prob: f64, + pub(crate) flush_error_prob: f64, + pub(crate) fsync_error_prob: f64, + pub(crate) open_error_prob: f64, + pub(crate) metadata_error_prob: f64, + pub(crate) max_short_io_divisor: usize, + pub(crate) no_space_prob: f64, + pub(crate) partial_failure_prob: f64, +} + +impl StorageFaultConfig { + pub(crate) fn for_profile(profile: CommitlogFaultProfile) -> Self { + match profile { + CommitlogFaultProfile::Off => Self { + profile, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 0.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + no_space_prob: 0.0, + partial_failure_prob: 0.0, + }, + // Realistic rare faults: ~1 in 1000 latency, ~1 in 10000 short I/O / errors. + CommitlogFaultProfile::Light => Self { + profile, + latency_prob: 0.001, + long_latency_prob: 0.0001, + short_io_prob: 0.0001, + read_error_prob: 0.0001, + write_error_prob: 0.0001, + flush_error_prob: 0.0001, + fsync_error_prob: 0.0001, + open_error_prob: 0.0001, + metadata_error_prob: 0.0001, + max_short_io_divisor: 2, + no_space_prob: 0.0001, + partial_failure_prob: 0.0001, + }, + // Moderate rare faults: ~1 in 500 latency, ~1 in 5000 short I/O / errors. + CommitlogFaultProfile::Default => Self { + profile, + latency_prob: 0.002, + long_latency_prob: 0.0002, + short_io_prob: 0.0002, + read_error_prob: 0.0002, + write_error_prob: 0.0002, + flush_error_prob: 0.0002, + fsync_error_prob: 0.0002, + open_error_prob: 0.0002, + metadata_error_prob: 0.0002, + max_short_io_divisor: 2, + no_space_prob: 0.0002, + partial_failure_prob: 0.0002, + }, + // Stress test: ~1 in 10 operations see a fault. + CommitlogFaultProfile::Aggressive => Self { + profile, + latency_prob: 0.10, + long_latency_prob: 0.02, + short_io_prob: 0.02, + read_error_prob: 0.01, + write_error_prob: 0.01, + flush_error_prob: 0.01, + fsync_error_prob: 0.01, + open_error_prob: 0.01, + metadata_error_prob: 0.01, + max_short_io_divisor: 2, + no_space_prob: 0.01, + partial_failure_prob: 0.01, + }, + } + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub(crate) struct StorageFaultSummary { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) latency: usize, + pub(crate) short_read: usize, + pub(crate) short_write: usize, + pub(crate) read_error: usize, + pub(crate) write_error: usize, + pub(crate) flush_error: usize, + pub(crate) fsync_error: usize, + pub(crate) open_error: usize, + pub(crate) metadata_error: usize, + pub(crate) no_space: usize, + pub(crate) partial_failure: usize, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum StorageFaultDomain { + Disk, + Snapshot, +} + +impl StorageFaultDomain { + fn label(self) -> &'static str { + match self { + Self::Disk => "disk", + Self::Snapshot => "snapshot", + } + } +} + +#[derive(Clone)] +pub(crate) struct StorageFaultController { + config: StorageFaultConfig, + domain: StorageFaultDomain, + counters: Arc, + handle: Option, + suspended: Arc, +} + +impl StorageFaultController { + pub(crate) fn new(config: StorageFaultConfig, domain: StorageFaultDomain) -> Self { + Self { + config, + domain, + counters: Arc::default(), + handle: crate::sim::current_handle(), + suspended: Arc::new(AtomicUsize::new(0)), + } + } + + pub(crate) fn enable(&self) { + if let Some(handle) = &self.handle { + handle.enable_buggify(); + } + } + + pub(crate) fn with_suspended(&self, f: impl FnOnce() -> T) -> T { + self.suspended.fetch_add(1, Ordering::Relaxed); + let _guard = SuspendFaultsGuard { + suspended: self.suspended.clone(), + }; + f() + } + + pub(crate) fn maybe_latency(&self) { + if self.sample_latency(self.config.latency_prob) { + self.counters.latency.fetch_add(1, Ordering::Relaxed); + let latency = if self.sample_latency(self.config.long_latency_prob) { + Duration::from_millis(25) + } else { + Duration::from_millis(1) + }; + if let Some(handle) = &self.handle { + handle.advance(latency); + } + } + } + + pub(crate) fn maybe_error(&self, kind: StorageFaultKind) -> io::Result<()> { + let prob = kind.probability(&self.config); + if self.sample(prob) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + return Err(io::Error::new(kind.error_kind(), kind.message(self.domain))); + } + Ok(()) + } + + pub(crate) fn check_pending_error(&self, kind: StorageFaultKind) -> io::Result<()> { + if self.counters.pending_error.swap(false, Ordering::Relaxed) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + self.counters.partial_failure.fetch_add(1, Ordering::Relaxed); + return Err(io::Error::new(kind.error_kind(), kind.message(self.domain))); + } + Ok(()) + } + + pub(crate) fn arm_pending_error(&self) { + self.counters.pending_error.store(true, Ordering::Relaxed); + } + + pub(crate) fn sample_partial_failure(&self) -> bool { + if !self.active() || self.config.partial_failure_prob <= 0.0 { + return false; + } + match &self.handle { + Some(handle) => handle.buggify_with_prob(self.config.partial_failure_prob), + None => false, + } + } + + pub(crate) fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { + if len <= 1 { + return len; + } + if !self.sample(self.config.short_io_prob) { + return len; + } + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + let divisor = self.config.max_short_io_divisor.max(2); + (len / divisor).max(1) + } + + pub(crate) fn summary(&self) -> StorageFaultSummary { + StorageFaultSummary { + profile: self.config.profile, + latency: self.counters.latency.load(Ordering::Relaxed) as usize, + short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, + short_write: self.counters.short_write.load(Ordering::Relaxed) as usize, + read_error: self.counters.read_error.load(Ordering::Relaxed) as usize, + write_error: self.counters.write_error.load(Ordering::Relaxed) as usize, + flush_error: self.counters.flush_error.load(Ordering::Relaxed) as usize, + fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, + open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, + metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, + no_space: self.counters.no_space.load(Ordering::Relaxed) as usize, + partial_failure: self.counters.partial_failure.load(Ordering::Relaxed) as usize, + } + } + + fn active(&self) -> bool { + self.suspended.load(Ordering::Relaxed) == 0 + } + + fn sample(&self, probability: f64) -> bool { + if probability <= 0.0 || !self.active() { + return false; + } + match &self.handle { + Some(handle) => handle.buggify_with_prob(probability), + None => false, + } + } + + fn sample_latency(&self, probability: f64) -> bool { + if probability <= 0.0 { + return false; + } + match &self.handle { + Some(handle) => handle.buggify_with_prob(probability), + None => false, + } + } +} + +struct SuspendFaultsGuard { + suspended: Arc, +} + +impl Drop for SuspendFaultsGuard { + fn drop(&mut self) { + self.suspended.fetch_sub(1, Ordering::Relaxed); + } +} + +#[derive(Debug, Default)] +struct FaultCounters { + latency: AtomicU64, + short_read: AtomicU64, + short_write: AtomicU64, + read_error: AtomicU64, + write_error: AtomicU64, + flush_error: AtomicU64, + fsync_error: AtomicU64, + open_error: AtomicU64, + metadata_error: AtomicU64, + no_space: AtomicU64, + partial_failure: AtomicU64, + pending_error: AtomicBool, +} + +#[derive(Clone, Copy)] +pub(crate) enum ShortIoKind { + Read, + Write, +} + +impl ShortIoKind { + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.short_read, + Self::Write => &counters.short_write, + } + } +} + +#[derive(Clone, Copy)] +pub(crate) enum StorageFaultKind { + Read, + Write, + Flush, + Fsync, + Open, + Metadata, + NoSpace, +} + +impl StorageFaultKind { + fn probability(self, config: &StorageFaultConfig) -> f64 { + match self { + Self::Read => config.read_error_prob, + Self::Write => config.write_error_prob, + Self::Flush => config.flush_error_prob, + Self::Fsync => config.fsync_error_prob, + Self::Open => config.open_error_prob, + Self::Metadata => config.metadata_error_prob, + Self::NoSpace => config.no_space_prob, + } + } + + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.read_error, + Self::Write => &counters.write_error, + Self::Flush => &counters.flush_error, + Self::Fsync => &counters.fsync_error, + Self::Open => &counters.open_error, + Self::Metadata => &counters.metadata_error, + Self::NoSpace => &counters.no_space, + } + } + + fn error_kind(self) -> io::ErrorKind { + match self { + Self::NoSpace => io::ErrorKind::StorageFull, + _ => io::ErrorKind::Other, + } + } + + fn message(self, domain: StorageFaultDomain) -> String { + let label = domain.label(); + match self { + Self::Read => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Write => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Flush => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Fsync => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Open => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::Metadata => format!("{INJECTED_ERROR_PREFIX}{label} input/output error"), + Self::NoSpace => format!("{INJECTED_ERROR_PREFIX}{label} no space left on device"), + } + } +} diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs new file mode 100644 index 00000000000..bdeae0fbb58 --- /dev/null +++ b/crates/dst/src/sim/time.rs @@ -0,0 +1,123 @@ +//! Virtual time for the local DST simulator. + +use std::time::Duration; + +pub use spacetimedb_runtime::sim::time::TimeoutElapsed; +pub use spacetimedb_runtime::sim::Handle as TimeHandle; + +fn current_handle() -> TimeHandle { + super::current_handle().expect("sim::time used outside Runtime::block_on") +} + +pub fn try_current_handle() -> Option { + super::current_handle() +} + +pub fn now() -> Duration { + current_handle().now() +} + +pub async fn sleep(duration: Duration) { + current_handle().sleep(duration).await +} + +pub async fn timeout(duration: Duration, future: impl core::future::Future) -> Result { + current_handle().timeout(duration, future).await +} + +pub fn advance(duration: Duration) { + current_handle().advance(duration); +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::sim; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(101).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(102).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await.expect("fast timer task should complete"); + slow.await.expect("slow timer task should complete"); + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(103).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(104).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(105).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs new file mode 100644 index 00000000000..1a00c77a937 --- /dev/null +++ b/crates/dst/src/targets/descriptor.rs @@ -0,0 +1,40 @@ +//! Target descriptor layer used by the CLI. + +use std::{future::Future, pin::Pin}; + +use crate::{config::RunConfig, workload::table_ops::TableScenarioId}; + +/// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. +pub trait TargetDescriptor { + const NAME: &'static str; + type Scenario; + + fn prepare(_seed: u64, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { + Ok(()) + } + + fn run_streaming(seed: u64, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture; +} + +pub type TargetRunFuture = Pin>>>; + +pub struct RelationalDbCommitlogDescriptor; + +impl TargetDescriptor for RelationalDbCommitlogDescriptor { + const NAME: &'static str = "relational-db-commitlog"; + type Scenario = TableScenarioId; + + fn run_streaming(seed: u64, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + Box::pin(async move { + let outcome = + crate::targets::relational_db_commitlog::run_generated_with_config_and_scenario(seed, scenario, config) + .await?; + Ok(format!( + "ok target={} seed={} steps={}", + Self::NAME, + seed, + outcome.final_row_counts.iter().sum::(), + )) + }) + } +} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs new file mode 100644 index 00000000000..51a483d73a2 --- /dev/null +++ b/crates/dst/src/targets/mod.rs @@ -0,0 +1,4 @@ +//! Concrete simulation targets. + +pub mod descriptor; +pub mod relational_db_commitlog; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs new file mode 100644 index 00000000000..5a116a6e3aa --- /dev/null +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -0,0 +1,848 @@ +//! Simple RelationalDB DST target — table operations only. + +use std::ops::Bound; +use std::sync::Arc; + +use spacetimedb_commitlog::repo::mem::Memory; +use spacetimedb_core::{ + db::persistence::{DiskSizeFn, Persistence}, + db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + error::DBError, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{execution_context::Workload, traits::IsolationLevel}; +use spacetimedb_durability::local::Options as DurabilityOpts; +use spacetimedb_durability::Local as DurabilityLocal; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_runtime::Handle as RuntimeHandle; +use spacetimedb_sats::AlgebraicValue; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_snapshot::SnapshotStore; +use spacetimedb_table::page_pool::PagePool; +use tracing::{info, trace}; + +use crate::{ + client::SessionId, + config::{CommitlogFaultProfile, RunConfig}, + core::{self, TargetEngine}, + properties::{ + PropertyRuntime, TableMutation, TableObservation, TargetPropertyAccess, + }, + schema::{SchemaPlan, SimRow}, + sim::{ + commitlog::{CommitlogFaultConfig, FaultableRepo}, + fork_seed, + snapshot::BuggifiedSnapshotRepo, + storage_faults::StorageFaultConfig, + Rng, + }, + workload::table_ops::{ + ConnectionWriteState, TableErrorKind, TableOperation, TableScenario, TableScenarioId, TableWorkloadInteraction, + TableWorkloadOutcome, TableWorkloadSource, + }, +}; + +pub type RelationalDbTableOutcome = TableWorkloadOutcome; + +pub async fn run_generated_with_config_and_scenario( + seed: u64, + scenario: TableScenarioId, + config: RunConfig, +) -> anyhow::Result { + let num_connections = { + let rng = Rng::new(fork_seed(seed, 121)); + rng.index(3) + 1 + }; + let schema_rng = Rng::new(fork_seed(seed, 122)); + let schema = scenario.generate_schema(&schema_rng); + let source = TableWorkloadSource::new( + seed, + scenario, + schema.clone(), + num_connections, + config.max_interactions_or_default(usize::MAX), + ); + + let sim_handle = crate::sim::current_handle().expect("must run inside sim Runtime::block_on"); + let rt_handle = RuntimeHandle::simulation(sim_handle.clone()); + + // Build faulty commitlog + persistence + let clog_repo = FaultableRepo::new( + Memory::unlimited(), + CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Default), + ); + let local = DurabilityLocal::open_with_repo(clog_repo, rt_handle.clone(), DurabilityOpts::default())?; + let history = local.as_history(); + let durability = Arc::new(local); + + // Build faulty snapshot store + let snap_repo = Arc::new(BuggifiedSnapshotRepo::new( + StorageFaultConfig::for_profile(CommitlogFaultProfile::Default), + )?) as Arc; + + // Enable buggify after setup so initial replay is fault-free + sim_handle.enable_buggify(); + + let persistence = Persistence { + durability, + disk_size: { + use std::io; + use spacetimedb_commitlog::repo::SizeOnDisk; + Arc::new(|| io::Result::Ok(SizeOnDisk { total_bytes: 0, total_blocks: 0 })) as DiskSizeFn + }, + snapshot_store: Some(snap_repo), + snapshots: None, + runtime: rt_handle, + }; + + let engine = RelationalDbEngine::new(seed, &schema, num_connections, history, Some(persistence))?; + let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); + let outcome = core::run_streaming(source, engine, properties, config).await?; + info!( + applied_steps = outcome.final_row_counts.iter().sum::(), + "relational_db_table complete" + ); + Ok(outcome) +} + +struct RelationalDbEngine { + db: Option, + execution: ConnectionWriteState, + read_tx_by_connection: Vec>, + base_schema: SchemaPlan, + base_table_ids: Vec, + step: usize, +} + +impl RelationalDbEngine { + fn new>( + _seed: u64, schema: &SchemaPlan, num_connections: usize, + history: H, persistence: Option, + ) -> anyhow::Result { + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + history, + persistence, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) + })?; + + let mut engine = Self { + db: Some(db), + execution: ConnectionWriteState::new(num_connections), + read_tx_by_connection: (0..num_connections).map(|_| None).collect(), + base_schema: schema.clone(), + base_table_ids: Vec::with_capacity(schema.tables.len()), + step: 0, + }; + engine.install_base_schema().map_err(anyhow::Error::msg)?; + Ok(engine) + } + + fn db(&self) -> Result<&RelationalDB, String> { + self.db.as_ref().ok_or_else(|| "relational db not initialized".to_string()) + } + + fn install_base_schema(&mut self) -> Result<(), String> { + let mut tx = self + .db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + for table in &self.base_schema.tables { + let columns = table + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) + .collect::>(); + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", table.name), + BTreeAlgorithm::from(0), + )]; + for cols in &table.extra_indexes { + let cols_name = cols.iter().map(|col| format!("c{col}")).collect::>().join("_"); + indexes.push(IndexSchema::for_test( + format!("{}_{}_idx", table.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", table.name), + 0, + )]; + let table_id = self + .db()? + .create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&table.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + ) + .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; + self.base_table_ids.push(table_id); + } + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("install base schema commit failed: {err}"))?; + Ok(()) + } + + fn execute(&mut self, interaction: &TableWorkloadInteraction) -> Result { + self.step = self.step.saturating_add(1); + self.execute_table_op(interaction) + } + + fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { + trace!(step = self.step, op = ?interaction.op, "table interaction"); + let observation = self.execute_table_op_inner(&interaction.op)?; + Ok(observation) + } + + fn execute_table_op_inner(&mut self, op: &TableOperation) -> Result { + match op { + TableOperation::BeginTx { conn } => self.begin_write_tx(*conn), + TableOperation::BeginReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open write transaction")); + } + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + let tx = self.db()?.begin_tx(Workload::ForTests); + self.read_tx_by_connection[conn.as_index()] = Some(tx); + Ok(TableObservation::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + let tx = self.read_tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no read transaction to release"))?; + let _ = self.db()?.release_tx(tx); + Ok(TableObservation::Applied) + } + TableOperation::CommitTx { conn } => { + self.execution.ensure_writer_owner(*conn, "commit")?; + let tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("commit interaction failed: {err}"))?; + self.execution.active_writer = None; + Ok(TableObservation::CommitOrRollback) + } + TableOperation::RollbackTx { conn } => { + self.execution.ensure_writer_owner(*conn, "rollback")?; + let tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + Ok(TableObservation::CommitOrRollback) + } + TableOperation::InsertRows { conn, table, rows } => self.execute_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.execute_delete_rows(*conn, *table, rows), + TableOperation::AddColumn { + conn, + table, + column, + default, + } => { + let table_id = self.table_id_for_index(*table)?; + let column_idx = self.base_schema.tables[*table].columns.len() as u16; + let mut columns = self.base_schema.tables[*table] + .columns + .iter() + .enumerate() + .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) + .collect::>(); + columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); + self.with_mut_tx(*conn, |engine, tx| { + let new_table_id = engine + .db()? + .add_columns_to_table(tx, table_id, columns.clone(), vec![default.clone()]) + .map_err(|err| format!("add column failed: {err}"))?; + Ok(new_table_id) + })?; + Ok(TableObservation::Applied) + } + TableOperation::AddIndex { conn, table, cols } => { + let table_id = self.table_id_for_index(*table)?; + self.with_mut_tx(*conn, |engine, tx| { + let mut schema = IndexSchema::for_test( + format!( + "{}_dst_added_{}_idx", + engine.base_schema.tables[*table].name, + engine.base_schema.tables[*table].extra_indexes.len() + ), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + ); + schema.table_id = table_id; + engine + .db()? + .create_index(tx, schema, false) + .map_err(|err| format!("add index failed: {err}"))?; + Ok(()) + })?; + if !self.base_schema.tables[*table].extra_indexes.contains(cols) { + self.base_schema.tables[*table].extra_indexes.push(cols.clone()); + } + Ok(TableObservation::Applied) + } + TableOperation::PointLookup { conn, table, id } => { + let actual = self.lookup_base_row(*conn, *table, *id)?; + Ok(TableObservation::PointLookup { + conn: *conn, + table: *table, + id: *id, + actual, + }) + } + TableOperation::PredicateCount { + conn, + table, + col, + value, + } => { + let actual = self.count_by_col_eq_in_connection(*conn, *table, *col, value)?; + Ok(TableObservation::PredicateCount { + conn: *conn, + table: *table, + col: *col, + value: value.clone(), + actual, + }) + } + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + } => { + let actual = self.range_scan_in_connection(*conn, *table, cols, lower.clone(), upper.clone())?; + Ok(TableObservation::RangeScan { + conn: *conn, + table: *table, + cols: cols.clone(), + lower: lower.clone(), + upper: upper.clone(), + actual, + }) + } + TableOperation::FullScan { conn, table } => { + let actual = self.collect_rows_in_connection(*conn, *table)?; + Ok(TableObservation::FullScan { + conn: *conn, + table: *table, + actual, + }) + } + } + } + + fn begin_write_tx(&mut self, conn: SessionId) -> Result { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + match self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + { + Some(tx) => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + let _ = self.db()?.rollback_mut_tx(tx); + return Err(format!( + "connection {conn} unexpectedly acquired write lock while conflicting transaction was open" + )); + } + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + self.execution.active_writer = Some(conn); + Ok(TableObservation::Applied) + } + None => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + Ok(TableObservation::ObservedError( + TableErrorKind::WriteConflict, + )) + } else { + Err(format!( + "connection {conn} failed to begin write transaction without an open conflicting lock" + )) + } + } + } + } + + fn execute_insert_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.try_insert_base_row(tx, table, row)? { + Ok(returned) => mutations.push(TableMutation::Inserted { + table, + requested: row.clone(), + returned, + }), + Err(err) if is_unique_constraint_violation(&err) => { + return Ok(Err(TableErrorKind::UniqueConstraintViolation)); + } + Err(err) => return Err(format!("insert failed: {err}")), + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn execute_delete_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.delete_base_row_count(tx, table, row)? { + 0 => return Ok(Err(TableErrorKind::MissingRow)), + 1 => mutations.push(TableMutation::Deleted { + table, + row: row.clone(), + }), + deleted => { + return Err(format!("delete for row={row:?} affected {deleted} rows")); + } + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn mutation_observation( + &mut self, + conn: SessionId, + in_tx: bool, + outcome: Result, TableErrorKind>, String>, + ) -> Result { + match outcome { + Ok(Ok(mutations)) => Ok(TableObservation::Mutated { conn, mutations, in_tx }), + Ok(Err(kind)) => Ok(TableObservation::ObservedError(kind)), + Err(err) if is_write_conflict_error(&err) => { + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + } + Err(err) => Err(err), + } + } + + fn with_mut_tx_observed( + &mut self, + conn: SessionId, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, String>, + ) -> Result, String> { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + let result = f(self, &mut tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + return result; + } + + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + return Ok(Err(TableErrorKind::WriteConflict)); + } + + let mut tx = self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; + self.execution.active_writer = Some(conn); + let value = match f(self, &mut tx) { + Ok(Ok(value)) => value, + Ok(Err(kind)) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Ok(Err(kind)); + } + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("auto-commit write failed: {err}"))?; + self.execution.active_writer = None; + Ok(Ok(value)) + } + + fn with_mut_tx( + &mut self, + conn: SessionId, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, + ) -> Result { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + let result = f(self, &mut tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + return result; + } + + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + return Err(format!( + "connection {conn} cannot auto-commit write while a conflicting lock is open" + )); + } + + let mut tx = self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; + self.execution.active_writer = Some(conn); + let value = match f(self, &mut tx) { + Ok(value) => value, + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let _ = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("auto-commit write failed: {err}"))?; + self.execution.active_writer = None; + Ok(value) + } + + fn try_insert_base_row( + &self, + tx: &mut RelMutTx, + table: usize, + row: &SimRow, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + Ok(match self.db()?.insert(tx, table_id, &bsatn) { + Ok((_, row_ref, _)) => Ok(SimRow::from_product_value(row_ref.to_product_value())), + Err(err) => Err(err), + }) + } + + fn delete_base_row_count(&self, tx: &mut RelMutTx, table: usize, row: &SimRow) -> Result { + let table_id = self.table_id_for_index(table)?; + Ok(self.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) + } + + fn any_open_read_tx(&self) -> bool { + self.read_tx_by_connection.iter().any(Option::is_some) + } + + fn is_in_write_tx(&self, conn: SessionId) -> bool { + self.execution + .tx_by_connection + .get(conn.as_index()) + .is_some_and(Option::is_some) + } + + fn table_id_for_index(&self, table: usize) -> Result { + self.base_table_ids + .get(table) + .copied() + .ok_or_else(|| format!("table {table} out of range")) + } + + fn with_fresh_read_tx(&self, f: impl FnOnce(&RelationalDB, &RelTx) -> Result) -> Result { + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + let result = f(db, &tx); + let _ = db.release_tx(tx); + result + } + + fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { + self.with_fresh_read_tx(|db, tx| { + let mut rows = db + .iter(tx, table_id) + .map_err(|err| format!("scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + }) + } + + fn lookup_base_row(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("read-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + }) + } + } + + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter_mut(tx, table_id) + .map_err(|err| format!("in-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter(tx, table_id) + .map_err(|err| format!("read-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else { + self.collect_rows_by_id(table_id) + } + } + + fn count_by_col_eq_in_connection( + &self, + conn: SessionId, + table: usize, + col: u16, + value: &AlgebraicValue, + ) -> Result { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq_mut(tx, table_id, col, value) + .map_err(|err| format!("in-tx predicate query failed: {err}"))? + .count()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("read-tx predicate query failed: {err}"))? + .count()) + } else { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("predicate query failed: {err}"))? + .count()) + }) + } + } + + fn range_scan_in_connection( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let cols_list = cols.iter().copied().collect::(); + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter_by_col_range_mut(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("in-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("read-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else { + self.with_fresh_read_tx(|db, tx| { + let mut rows = db + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + }) + } + } +} + +impl TargetEngine for RelationalDbEngine { + type Observation = TableObservation; + type Outcome = TableWorkloadOutcome; + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a TableWorkloadInteraction, + ) -> impl std::future::Future> + 'a { + async move { self.execute(interaction) } + } + + fn finish(&mut self) {} + + fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { + async move { + let mut final_rows = Vec::with_capacity(self.base_schema.tables.len()); + let mut final_row_counts = Vec::with_capacity(self.base_schema.tables.len()); + for table in 0..self.base_schema.tables.len() { + let table_id = self.table_id_for_index(table).map_err(anyhow::Error::msg)?; + let rows = self.collect_rows_by_id(table_id).map_err(anyhow::Error::msg)?; + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + Ok(TableWorkloadOutcome { + final_row_counts, + final_rows, + }) + } + } +} + +impl TargetPropertyAccess for RelationalDbEngine { + fn schema_plan(&self) -> &SchemaPlan { + &self.base_schema + } + + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { + self.lookup_base_row(conn, table, id) + } + + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + self.collect_rows_in_connection(conn, table) + } + + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + self.collect_rows_by_id(table_id) + } + + fn count_rows(&self, table: usize) -> Result { + let table_id = self.table_id_for_index(table)?; + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("count rows failed: {err}"))? + .count()) + }) + } + + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + let table_id = self.table_id_for_index(table)?; + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("count by col eq failed: {err}"))? + .count()) + }) + } + + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let cols_list = cols.iter().copied().collect::(); + self.with_fresh_read_tx(|db, tx| { + let mut rows = db + .iter_by_col_range(tx, table_id, cols_list, (lower, upper)) + .map_err(|err| format!("range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + }) + } +} + +fn is_unique_constraint_violation(err: &DBError) -> bool { + err.to_string().contains("Unique") || err.to_string().contains("unique") +} + +fn is_write_conflict_error(err: &str) -> bool { + err.contains("WriteConflict") || err.contains("write conflict") || err.contains("Serialization failure") +} diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs new file mode 100644 index 00000000000..faf3c04b5f2 --- /dev/null +++ b/crates/dst/src/workload/mod.rs @@ -0,0 +1,4 @@ +//! Shared workload generators reused by multiple DST targets. + +pub mod table_ops; +pub(crate) mod strategy; diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs new file mode 100644 index 00000000000..6c70ebb9e94 --- /dev/null +++ b/crates/dst/src/workload/strategy.rs @@ -0,0 +1,112 @@ +//! Small proptest-inspired strategy primitives for deterministic DST generation. +//! +//! This is intentionally minimal: we keep DST's streaming execution model and +//! use strategies only for typed, composable input generation. + +use crate::sim::Rng; + +/// Typed strategy that can sample values from the shared deterministic RNG. +pub(crate) trait Strategy: Sized { + fn sample(&self, rng: &Rng) -> T; +} + +/// Picks a value in `[0, upper)`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Index { + upper: usize, +} + +impl Index { + pub(crate) fn new(upper: usize) -> Self { + assert!(upper > 0, "index upper bound must be non-zero"); + Self { upper } + } +} + +impl Strategy for Index { + fn sample(&self, rng: &Rng) -> usize { + rng.index(self.upper) + } +} + +/// Bernoulli-style strategy from an integer percentage in `[0, 100]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Percent { + percent: usize, +} + +impl Percent { + pub(crate) fn new(percent: usize) -> Self { + assert!(percent <= 100, "percent must be in 0..=100, got {percent}"); + Self { percent } + } +} + +impl Strategy for Percent { + fn sample(&self, rng: &Rng) -> bool { + Index::new(100).sample(rng) < self.percent + } +} + +/// Weighted discrete choice over cloneable values. +#[derive(Clone, Debug)] +pub(crate) struct Weighted { + options: Vec<(usize, T)>, + total_weight: usize, +} + +impl Weighted { + pub(crate) fn new(options: Vec<(usize, T)>) -> Self { + let total_weight = options.iter().map(|(weight, _)| *weight).sum(); + assert!(total_weight > 0, "weighted strategy requires positive total weight"); + Self { options, total_weight } + } +} + +impl Strategy for Weighted { + fn sample(&self, rng: &Rng) -> T { + let mut pick = Index::new(self.total_weight).sample(rng); + for (weight, value) in &self.options { + if pick < *weight { + return value.clone(); + } + pick -= *weight; + } + self.options + .last() + .map(|(_, value)| value.clone()) + .expect("weighted strategy has at least one option") + } +} + +#[cfg(test)] +mod tests { + use crate::sim::Rng; + + use super::{Index, Percent, Strategy, Weighted}; + + #[test] + fn weighted_is_deterministic_for_seed() { + let strategy = Weighted::new(vec![(1, 10usize), (2, 20usize), (3, 30usize)]); + let rng_a = Rng::new(7); + let rng_b = Rng::new(7); + let a = (0..16).map(|_| strategy.sample(&rng_a)).collect::>(); + let b = (0..16).map(|_| strategy.sample(&rng_b)).collect::>(); + assert_eq!(a, b); + } + + #[test] + fn index_strategy_respects_bounds() { + let rng = Rng::new(123); + for _ in 0..64 { + let idx = Index::new(5).sample(&rng); + assert!(idx < 5); + } + } + + #[test] + #[should_panic(expected = "percent must be in 0..=100")] + fn percent_rejects_out_of_range_values() { + let _ = Percent::new(101); + } +} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs new file mode 100644 index 00000000000..b6050fd8e18 --- /dev/null +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -0,0 +1,288 @@ +use std::collections::VecDeque; + +use crate::{ + client::SessionId, + core::WorkloadSource, + schema::{ColumnPlan, SchemaPlan, TablePlan}, + sim::{fork_seed, Rng}, + workload::strategy::{Index, Percent, Strategy}, +}; + +use super::{ + model::GenerationModel, + strategies::{ConnectionChoice, TableChoice, TxControlAction, TxControlChoice}, + TableScenario, TableWorkloadInteraction, +}; + +/// Streaming planner for table-oriented workloads. +/// +/// The stream keeps only generator state plus a small pending queue, so long +/// duration runs do not need to materialize the full interaction list in +/// memory up front. +#[derive(Clone, Debug)] +pub struct TableWorkloadSource { + rng: Rng, + scenario: S, + model: GenerationModel, + num_connections: usize, + target_interactions: usize, + emitted: usize, + finalize_conn: usize, + pending: VecDeque, + finished: bool, +} + +/// Narrow helper passed to scenario code so scenario-specific planning can +/// inspect the current model and enqueue interactions without owning the whole +/// stream state machine. +pub struct ScenarioPlanner<'a> { + rng: &'a Rng, + model: &'a mut GenerationModel, + pending: &'a mut VecDeque, +} + +impl<'a> ScenarioPlanner<'a> { + pub fn choose_index(&mut self, len: usize) -> usize { + Index::new(len).sample(self.rng) + } + + pub fn choose_table(&mut self) -> usize { + TableChoice { + table_count: self.model.schema.tables.len(), + } + .sample(self.rng) + } + + pub fn roll_percent(&mut self, percent: usize) -> bool { + Percent::new(percent).sample(self.rng) + } + + pub fn active_writer(&self) -> Option { + self.model.active_writer() + } + + pub fn has_read_tx(&self, conn: SessionId) -> bool { + self.model.has_read_tx(conn) + } + + pub fn any_read_tx(&self) -> bool { + self.model.any_read_tx() + } + + pub fn begin_read_tx(&mut self, conn: SessionId) { + self.model.begin_read_tx(conn); + } + + pub fn release_read_tx(&mut self, conn: SessionId) { + self.model.release_read_tx(conn); + } + + pub fn begin_tx(&mut self, conn: SessionId) { + self.model.begin_tx(conn); + } + + pub fn commit_tx(&mut self, conn: SessionId) { + self.model.commit(conn); + } + + pub fn rollback_tx(&mut self, conn: SessionId) { + self.model.rollback(conn); + } + + pub fn maybe_control_tx( + &mut self, + conn: SessionId, + begin_pct: usize, + commit_pct: usize, + rollback_pct: usize, + ) -> bool { + match (TxControlChoice { + begin_pct, + commit_pct, + rollback_pct, + }) + .sample(self.rng) + { + TxControlAction::Begin + if !self.model.connections[conn.as_index()].in_tx && !self.model.has_read_tx(conn) => + { + if self.model.active_writer().is_none() && !self.model.any_read_tx() { + self.model.begin_tx(conn); + self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); + } else { + self.pending + .push_back(TableWorkloadInteraction::begin_tx_conflict(conn)); + } + true + } + TxControlAction::Commit if self.model.connections[conn.as_index()].in_tx => { + self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); + true + } + TxControlAction::Rollback if self.model.connections[conn.as_index()].in_tx => { + self.model.rollback(conn); + self.pending.push_back(TableWorkloadInteraction::rollback_tx(conn)); + true + } + _ => false, + } + } + + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + self.model.visible_rows(conn, table) + } + + pub fn table_plan(&self, table: usize) -> &TablePlan { + &self.model.schema.tables[table] + } + + pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { + self.model.make_row(self.rng, table) + } + + pub fn insert(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { + self.model.insert(conn, table, row); + } + + pub fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_insert(conn, table, rows); + } + + pub fn delete(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { + self.model.delete(conn, table, row); + } + + pub fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_delete(conn, table, rows); + } + + pub fn add_column(&mut self, table: usize, column: ColumnPlan, default: spacetimedb_sats::AlgebraicValue) { + self.model.add_column(table, column, default); + } + + pub fn add_index(&mut self, table: usize, cols: Vec) { + self.model.add_index(table, cols); + } + + pub fn absent_row(&mut self, conn: SessionId, table: usize) -> crate::schema::SimRow { + self.model.absent_row(self.rng, conn, table) + } + + pub fn unique_key_conflict_row( + &mut self, + table: usize, + source: &crate::schema::SimRow, + ) -> Option { + self.model.unique_key_conflict_row(self.rng, table, source) + } + + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { + self.pending.push_back(interaction); + } +} + +impl TableWorkloadSource { + pub fn new( + seed: u64, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + ) -> Self { + Self { + rng: Rng::new(fork_seed(seed, 17)), + scenario, + model: GenerationModel::new(&schema, num_connections, seed), + num_connections, + target_interactions, + emitted: 0, + finalize_conn: 0, + pending: VecDeque::new(), + finished: false, + } + } + + pub fn request_finish(&mut self) { + self.target_interactions = self.emitted; + } + + #[allow(dead_code)] + pub fn has_open_read_tx(&self) -> bool { + self.model.any_read_tx() + } + + #[allow(dead_code)] + pub fn has_open_write_tx(&self) -> bool { + self.model.active_writer().is_some() + } + + fn fill_pending(&mut self) { + if self.emitted >= self.target_interactions { + while self.finalize_conn < self.num_connections { + let conn = SessionId::from_index(self.finalize_conn); + self.finalize_conn += 1; + if self.model.connections[conn.as_index()].in_tx { + self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); + return; + } + if self.model.has_read_tx(conn) { + self.model.release_read_tx(conn); + self.pending.push_back(TableWorkloadInteraction::release_read_tx(conn)); + return; + } + } + self.finished = true; + return; + } + + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&self.rng); + let mut planner = ScenarioPlanner { + rng: &self.rng, + model: &mut self.model, + pending: &mut self.pending, + }; + self.scenario.fill_pending(&mut planner, conn); + } +} + +impl TableWorkloadSource { + pub fn pull_next_interaction(&mut self) -> Option { + loop { + if let Some(interaction) = self.pending.pop_front() { + self.emitted += 1; + return Some(interaction); + } + + if self.finished { + return None; + } + + self.fill_pending(); + } + } +} + +impl WorkloadSource for TableWorkloadSource { + type Interaction = TableWorkloadInteraction; + + fn next_interaction(&mut self) -> Option { + self.pull_next_interaction() + } + + fn request_finish(&mut self) { + Self::request_finish(self); + } +} + +impl Iterator for TableWorkloadSource { + type Item = TableWorkloadInteraction; + + fn next(&mut self) -> Option { + self.pull_next_interaction() + } +} diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs new file mode 100644 index 00000000000..facf8a92734 --- /dev/null +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -0,0 +1,13 @@ +//! Shared transactional table workload used by table-oriented targets. + +mod generation; +mod model; +mod scenarios; +pub(crate) mod strategies; +mod types; + +pub(crate) use generation::TableWorkloadSource; +pub(crate) use model::{PredictedOutcome, TableOracle}; +pub use scenarios::TableScenarioId; +pub(crate) use types::{ConnectionWriteState, TableScenario}; +pub use types::{TableErrorKind, TableInteractionCase, TableOperation, TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs new file mode 100644 index 00000000000..f56b1db5a25 --- /dev/null +++ b/crates/dst/src/workload/table_ops/model.rs @@ -0,0 +1,709 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, + sim::{fork_seed, Rng}, +}; + +use super::{TableErrorKind, TableOperation}; + +/// Generator-side model of committed rows plus per-connection pending writes. +/// +/// This model is used only while producing interactions. It lets the planner +/// pick valid deletes, synthesize visibility checks, and enforce the +/// single-writer discipline before the real target executes anything. +#[derive(Clone, Debug)] +pub(crate) struct GenerationModel { + pub(crate) schema: SchemaPlan, + pub(crate) connections: Vec, + committed: Vec>, + next_ids: Vec, + active_writer: Option, +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct PendingConnection { + pub(crate) in_tx: bool, + read_snapshot: Option>>, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl GenerationModel { + pub(crate) fn new(schema: &SchemaPlan, num_connections: usize, seed: u64) -> Self { + Self { + schema: schema.clone(), + connections: vec![PendingConnection::default(); num_connections], + committed: vec![Vec::new(); schema.tables.len()], + next_ids: (0..schema.tables.len()) + .map(|idx| fork_seed(seed, idx as u64 + 100)) + .collect(), + active_writer: None, + } + } + + pub(crate) fn make_row(&mut self, rng: &Rng, table: usize) -> SimRow { + let table_plan = &self.schema.tables[table]; + let id = self.next_ids[table]; + self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); + let mut values = vec![AlgebraicValue::U64(id)]; + for (idx, col) in table_plan.columns.iter().enumerate().skip(1) { + values.push(generate_value_for_type(rng, &col.ty, idx)); + } + SimRow { values } + } + + pub(crate) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { + return snapshot[table].clone(); + } + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn_idx]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + pub(crate) fn absent_row(&mut self, rng: &Rng, conn: SessionId, table: usize) -> SimRow { + let mut row = self.make_row(rng, table); + while self.visible_rows(conn, table).iter().any(|candidate| candidate == &row) { + row = self.make_row(rng, table); + } + row + } + + pub(crate) fn unique_key_conflict_row(&self, rng: &Rng, table: usize, source: &SimRow) -> Option { + let table_plan = &self.schema.tables[table]; + let value_count = source.values.len().min(table_plan.columns.len()); + if value_count <= 1 { + return None; + } + + let col_idx = 1 + rng.index(value_count - 1); + let mut row = source.clone(); + row.values[col_idx] = distinct_value_for_type(&table_plan.columns[col_idx].ty, &row.values[col_idx]); + Some(row) + } + + pub(crate) fn active_writer(&self) -> Option { + self.active_writer + } + + pub(crate) fn has_read_tx(&self, conn: SessionId) -> bool { + self.connections[conn.as_index()].read_snapshot.is_some() + } + + pub(crate) fn any_read_tx(&self) -> bool { + self.connections + .iter() + .any(|connection| connection.read_snapshot.is_some()) + } + + pub(crate) fn begin_read_tx(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + assert!(!pending.in_tx, "connection already has write transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); + pending.read_snapshot = Some(self.committed.clone()); + } + + pub(crate) fn release_read_tx(&mut self, conn: SessionId) { + assert!( + self.connections[conn.as_index()].read_snapshot.take().is_some(), + "connection has no read transaction" + ); + } + + pub(crate) fn begin_tx(&mut self, conn: SessionId) { + assert!(self.active_writer.is_none(), "single writer already active"); + let pending = &mut self.connections[conn.as_index()]; + assert!(!pending.in_tx, "connection already in transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); + pending.in_tx = true; + self.active_writer = Some(conn); + } + + pub(crate) fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; + if pending.in_tx { + pending.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row); + } + } + + pub(crate) fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.insert(conn, table, row.clone()); + } + } + + pub(crate) fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; + if pending.in_tx { + pending + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + pending.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + pub(crate) fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + + pub(crate) fn commit(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + let inserts = std::mem::take(&mut pending.staged_inserts); + let deletes = std::mem::take(&mut pending.staged_deletes); + pending.in_tx = false; + self.active_writer = None; + + for (table, row) in &deletes { + self.committed[*table].retain(|candidate| candidate != row); + } + for (table, row) in &inserts { + self.committed[*table].push(row.clone()); + } + } + + pub(crate) fn rollback(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + pending.staged_inserts.clear(); + pending.staged_deletes.clear(); + pending.in_tx = false; + self.active_writer = None; + } + + pub(crate) fn add_column(&mut self, table: usize, column: ColumnPlan, default: AlgebraicValue) { + self.schema.tables[table].columns.push(column); + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } + + pub(crate) fn add_index(&mut self, table: usize, cols: Vec) { + let indexes = &mut self.schema.tables[table].extra_indexes; + if !indexes.contains(&cols) { + indexes.push(cols); + } + } +} + +/// Replay model used as the oracle for table workload properties. +/// +/// Target property runtimes apply every table interaction here in parallel with +/// real target execution, then compare the collected target outcome against this +/// model at the end of the run. +#[derive(Clone, Debug)] +pub struct TableOracle { + committed: Vec>, + connections: Vec, + active_writer: Option, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum PredictedOutcome { + Applied, + NoMutation { + subject: Option<(SessionId, usize)>, + }, + Error { + kind: TableErrorKind, + subject: Option<(SessionId, usize)>, + }, +} + +#[derive(Clone, Debug, Default)] +struct ExpectedConnection { + in_tx: bool, + read_snapshot: Option>>, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl TableOracle { + pub fn new(table_count: usize, connection_count: usize) -> Self { + Self { + committed: vec![Vec::new(); table_count], + connections: vec![ExpectedConnection::default(); connection_count], + active_writer: None, + } + } + + pub fn predict(&self, op: &TableOperation) -> Result { + match op { + TableOperation::BeginTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin write tx with open read tx")); + } + if self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} already has open write tx")); + } + if self.active_writer.is_some() + || self + .connections + .iter() + .any(|connection| connection.read_snapshot.is_some()) + { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + }); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::BeginReadTx { conn } => { + self.ensure_connection(*conn)?; + let state = &self.connections[conn.as_index()]; + if state.in_tx || state.read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin read tx in current state")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_none() { + return Err(format!("connection {conn} has no read tx to release")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::CommitTx { conn } | TableOperation::RollbackTx { conn } => { + self.ensure_connection(*conn)?; + if self.active_writer != Some(*conn) || !self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} does not own an open write tx")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::InsertRows { conn, table, rows } => self.predict_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.predict_delete_rows(*conn, *table, rows), + TableOperation::AddColumn { .. } | TableOperation::AddIndex { .. } => Ok(PredictedOutcome::Applied), + TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => Ok(PredictedOutcome::NoMutation { subject: None }), + } + } + + pub fn apply(&mut self, op: &TableOperation) { + match op { + TableOperation::BeginTx { conn } => { + assert!( + self.active_writer.is_none(), + "multiple concurrent writers in table oracle" + ); + self.connections[conn.as_index()].in_tx = true; + self.active_writer = Some(*conn); + } + TableOperation::BeginReadTx { conn } => { + let state = &mut self.connections[conn.as_index()]; + assert!(!state.in_tx, "read tx started while write tx is open"); + assert!(state.read_snapshot.is_none(), "nested read tx in table oracle"); + state.read_snapshot = Some(self.committed.clone()); + } + TableOperation::ReleaseReadTx { conn } => { + assert!( + self.connections[conn.as_index()].read_snapshot.take().is_some(), + "release read tx without open read tx" + ); + } + TableOperation::CommitTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in table oracle"); + let state = &mut self.connections[conn.as_index()]; + for (table, row) in state.staged_deletes.drain(..) { + self.committed[table].retain(|candidate| *candidate != row); + } + for (table, row) in state.staged_inserts.drain(..) { + self.committed[table].push(row); + } + state.in_tx = false; + self.active_writer = None; + } + TableOperation::RollbackTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "rollback by non-owner in table oracle"); + let state = &mut self.connections[conn.as_index()]; + state.staged_inserts.clear(); + state.staged_deletes.clear(); + state.in_tx = false; + self.active_writer = None; + } + TableOperation::InsertRows { conn, table, rows } => self.insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.delete_rows(*conn, *table, rows), + TableOperation::AddColumn { + table, + column: _, + default, + .. + } => { + self.add_column(*table, default.clone()); + } + TableOperation::AddIndex { .. } => {} + TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => {} + } + } + + fn predict_insert_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + let mut mutates = false; + for row in rows { + let Some(id) = row.id() else { + return Err(format!("insert row for table {table} is missing primary id: {row:?}")); + }; + match visible.iter().find(|candidate| candidate.id() == Some(id)) { + Some(existing) if existing == row => {} + Some(_) => { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, table)), + }); + } + None => { + mutates = true; + visible.push(row.clone()); + } + } + } + + if mutates { + Ok(PredictedOutcome::Applied) + } else { + Ok(PredictedOutcome::NoMutation { + subject: Some((conn, table)), + }) + } + } + + fn predict_delete_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + for row in rows { + let Some(idx) = visible.iter().position(|candidate| candidate == row) else { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::MissingRow, + subject: Some((conn, table)), + }); + }; + visible.remove(idx); + } + + Ok(PredictedOutcome::Applied) + } + + fn predict_write_access(&self, conn: SessionId, table: usize) -> Result, String> { + self.ensure_connection(conn)?; + self.ensure_table(table)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot write while read tx is open")); + } + if let Some(owner) = self.active_writer + && owner != conn + { + return Ok(Some(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + })); + } + Ok(None) + } + + fn ensure_connection(&self, conn: SessionId) -> Result<(), String> { + self.connections + .get(conn.as_index()) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + fn ensure_table(&self, table: usize) -> Result<(), String> { + self.committed + .get(table) + .map(|_| ()) + .ok_or_else(|| format!("table {table} out of range")) + } + + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { + return snapshot[table].clone(); + } + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn_idx]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + pub fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { + self.visible_rows(conn, table) + .into_iter() + .find(|row| row.id() == Some(id)) + } + + pub fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.visible_rows(conn, table) + .into_iter() + .filter(|row| row.values.get(col as usize) == Some(value)) + .count() + } + + pub fn range_scan( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + let mut rows = self + .visible_rows(conn, table) + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + bound_contains_lower(lower, &key) && bound_contains_upper(upper, &key) + }) + .collect::>(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + rows + } + + pub fn committed_rows(mut self) -> Vec> { + for table_rows in &mut self.committed { + table_rows.sort_by_key(|row| row.id().unwrap_or_default()); + } + self.committed + } + + fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; + if state.in_tx { + state.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row); + } + } + + fn insert_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + if self + .visible_rows(conn, table) + .into_iter() + .any(|candidate| candidate == *row) + { + continue; + } + self.insert(conn, table, row.clone()); + } + } + + fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; + if state.in_tx { + state + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + state.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + fn delete_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + + fn add_column(&mut self, table: usize, default: AlgebraicValue) { + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } +} + +fn bound_contains_lower(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key >= value, + Bound::Excluded(value) => key > value, + Bound::Unbounded => true, + } +} + +fn bound_contains_upper(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key <= value, + Bound::Excluded(value) => key < value, + Bound::Unbounded => true, + } +} + +#[cfg(test)] +mod tests { + use spacetimedb_sats::AlgebraicValue; + + use crate::{client::SessionId, schema::SimRow}; + + use super::{PredictedOutcome, TableErrorKind, TableOperation, TableOracle}; + + fn row(id: u64) -> SimRow { + SimRow { + values: vec![AlgebraicValue::U64(id)], + } + } + + #[test] + fn write_conflict_prediction_does_not_request_blocking_visibility_check() { + let owner = SessionId::from_index(0); + let contender = SessionId::from_index(1); + let mut oracle = TableOracle::new(1, 2); + oracle.apply(&TableOperation::BeginTx { conn: owner }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn: contender, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + } + ); + } + + #[test] + fn exact_duplicate_insert_is_predicted_as_no_mutation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::NoMutation { + subject: Some((conn, 0)), + } + ); + } + + #[test] + fn same_id_different_row_is_predicted_as_unique_constraint_violation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(10)], + }], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(11)], + }], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, 0)), + } + ); + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs new file mode 100644 index 00000000000..4619473dc36 --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -0,0 +1,48 @@ +mod random_crud; + +use crate::{client::SessionId, schema::SchemaPlan, sim::Rng}; + +use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct RandomCrudScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum TableScenarioId { + #[default] + RandomCrud, +} + +impl TableScenario for RandomCrudScenario { + fn generate_schema(&self, rng: &Rng) -> SchemaPlan { + random_crud::generate_schema(rng) + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + random_crud::validate_outcome(schema, outcome) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + random_crud::fill_pending(planner, conn); + } +} + +impl TableScenario for TableScenarioId { + fn generate_schema(&self, rng: &Rng) -> SchemaPlan { + match self { + Self::RandomCrud => RandomCrudScenario.generate_schema(rng), + } + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + match self { + Self::RandomCrud => RandomCrudScenario.validate_outcome(schema, outcome), + } + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + match self { + Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), + } + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs new file mode 100644 index 00000000000..5864592e0e6 --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -0,0 +1,457 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicType; + +use crate::{ + client::SessionId, + schema::{default_value_for_type, generate_supported_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, + sim::Rng, + workload::strategy::{Index, Percent, Strategy}, +}; + +use super::super::{generation::ScenarioPlanner, TableInteractionCase, TableWorkloadInteraction, TableWorkloadOutcome}; + +#[derive(Clone, Copy)] +struct TableWorkloadProfile { + min_tables: usize, + table_count_choices: usize, + min_extra_cols: usize, + extra_col_choices: usize, + preferred_range_cols: usize, + prefer_range_compatible_pct: usize, + prefer_u64_pct: usize, + single_index_pct: usize, + composite2_index_pct: usize, + composite3_index_pct: usize, + insert_pct: usize, + begin_tx_pct: usize, + commit_tx_pct: usize, + rollback_tx_pct: usize, + begin_read_tx_pct: usize, + release_read_tx_pct: usize, + empty_tx_pct: usize, + exact_duplicate_insert_pct: usize, + unique_key_conflict_insert_pct: usize, + add_column_pct: usize, + add_index_pct: usize, +} + +const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { + min_tables: 2, + table_count_choices: 3, + min_extra_cols: 1, + extra_col_choices: 4, + preferred_range_cols: 2, + prefer_range_compatible_pct: 65, + prefer_u64_pct: 75, + single_index_pct: 70, + composite2_index_pct: 65, + composite3_index_pct: 30, + insert_pct: 65, + begin_tx_pct: 20, + commit_tx_pct: 15, + rollback_tx_pct: 10, + begin_read_tx_pct: 4, + release_read_tx_pct: 35, + empty_tx_pct: 2, + exact_duplicate_insert_pct: 4, + unique_key_conflict_insert_pct: 4, + add_column_pct: 1, + add_index_pct: 2, +}; + +pub fn generate_schema(rng: &Rng) -> SchemaPlan { + generate_schema_with_profile(rng, RANDOM_CRUD_PROFILE) +} + +fn generate_schema_with_profile(rng: &Rng, profile: TableWorkloadProfile) -> SchemaPlan { + let table_count = profile.min_tables + Index::new(profile.table_count_choices).sample(rng); + let mut tables = Vec::with_capacity(table_count); + + for table_idx in 0..table_count { + let extra_cols = profile.min_extra_cols + Index::new(profile.extra_col_choices).sample(rng); + let mut columns = vec![ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }]; + for col_idx in 0..extra_cols { + let ty = if col_idx < profile.preferred_range_cols + && Percent::new(profile.prefer_range_compatible_pct).sample(rng) + { + if Percent::new(profile.prefer_u64_pct).sample(rng) { + AlgebraicType::U64 + } else { + AlgebraicType::Bool + } + } else { + generate_supported_type(rng) + }; + columns.push(ColumnPlan { + name: format!("c{table_idx}_{col_idx}"), + ty, + }); + } + let mut extra_indexes = Vec::new(); + let non_primary_range_cols = columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, col)| is_range_compatible(&col.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + if let Some(&col) = non_primary_range_cols.first() + && Percent::new(profile.single_index_pct).sample(rng) + { + extra_indexes.push(vec![col]); + } + if non_primary_range_cols.len() >= 2 && Percent::new(profile.composite2_index_pct).sample(rng) { + extra_indexes.push(non_primary_range_cols[..2].to_vec()); + } + if non_primary_range_cols.len() >= 3 && Percent::new(profile.composite3_index_pct).sample(rng) { + extra_indexes.push(non_primary_range_cols[..3].to_vec()); + } + extra_indexes.sort(); + extra_indexes.dedup(); + tables.push(TablePlan { + name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), + columns, + extra_indexes, + }); + } + + SchemaPlan { tables } +} + +pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + Ok(()) +} + +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + fill_pending_with_profile(planner, conn, RANDOM_CRUD_PROFILE); +} + +fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, profile: TableWorkloadProfile) { + if planner.has_read_tx(conn) { + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if planner.roll_percent(profile.release_read_tx_pct) { + planner.release_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::release_read_tx(conn)); + } else if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + + if planner.active_writer().is_none() { + if planner.roll_percent(profile.empty_tx_pct) { + let rollback = planner.roll_percent(50); + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + if rollback { + planner.rollback_tx(conn); + planner.push_interaction(TableWorkloadInteraction::rollback_tx(conn)); + } else { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } + return; + } + + if planner.roll_percent(profile.begin_read_tx_pct) { + planner.begin_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_read_tx(conn)); + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + } + + if planner.maybe_control_tx( + conn, + profile.begin_tx_pct, + profile.commit_tx_pct, + profile.rollback_tx_pct, + ) { + return; + } + + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if planner.active_writer().is_none() + && !planner.any_read_tx() + && !visible_rows.is_empty() + && planner.roll_percent(profile.add_column_pct) + && emit_add_column(planner, conn, table) + { + return; + } + if planner.active_writer().is_none() + && !planner.any_read_tx() + && visible_rows.len() >= 2 + && planner.roll_percent(profile.add_index_pct) + && emit_add_index(planner, conn, table, &visible_rows) + { + return; + } + if emit_query(planner, conn, table, &visible_rows) { + return; + } + if planner.roll_percent(5) { + let row = planner.absent_row(conn, table); + planner.push_interaction(TableWorkloadInteraction::delete_missing(conn, table, row)); + return; + } + let choose_insert = visible_rows.is_empty() || planner.roll_percent(profile.insert_pct); + if choose_insert { + if planner.roll_percent(10) { + let count = 2 + planner.choose_index(3); + let rows = (0..count).map(|_| planner.make_row(table)).collect::>(); + planner.batch_insert(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_insert(conn, table, rows)); + return; + } + let row = planner.make_row(table); + planner.insert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); + return; + } + + if planner.roll_percent(profile.exact_duplicate_insert_pct) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.push_interaction(TableWorkloadInteraction::exact_duplicate_insert(conn, table, row)); + return; + } + if planner.roll_percent(profile.unique_key_conflict_insert_pct) + && emit_unique_key_conflict_insert(planner, conn, table, &visible_rows) + { + return; + } + + if visible_rows.len() >= 2 && planner.roll_percent(10) { + let count = 2 + planner.choose_index(visible_rows.len().min(3) - 1); + let mut candidates = visible_rows.clone(); + let mut rows = Vec::with_capacity(count); + for _ in 0..count { + let idx = planner.choose_index(candidates.len()); + rows.push(candidates.remove(idx)); + } + planner.batch_delete(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_delete(conn, table, rows)); + return; + } + if planner.roll_percent(6) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.delete(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::delete_with_case( + conn, + table, + row.clone(), + TableInteractionCase::Reinsert, + )); + planner.insert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); + return; + } + + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.delete(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); +} + +fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize) -> bool { + const MAX_COLUMNS_PER_TABLE: usize = 12; + let column_idx = planner.table_plan(table).columns.len(); + if column_idx >= MAX_COLUMNS_PER_TABLE { + return false; + } + let ty = match planner.choose_index(4) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::U64, + 2 => AlgebraicType::String, + _ => generate_supported_type_for_churn(planner), + }; + let column = ColumnPlan { + name: format!("dst_added_{table}_{column_idx}"), + ty, + }; + let default = default_value_for_type(&column.ty); + planner.add_column(table, column.clone(), default.clone()); + planner.push_interaction(TableWorkloadInteraction::add_column(conn, table, column, default)); + true +} + +fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize, visible_rows: &[SimRow]) -> bool { + let candidates = candidate_new_indexes(planner, table); + if candidates.is_empty() { + return false; + } + let cols = candidates[planner.choose_index(candidates.len())].clone(); + planner.add_index(table, cols.clone()); + planner.push_interaction(TableWorkloadInteraction::add_index(conn, table, cols.clone())); + if let Some((lower, upper)) = inclusive_bounds_for_rows(visible_rows, &cols) { + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + } + true +} + +fn emit_unique_key_conflict_insert( + planner: &mut ScenarioPlanner<'_>, + conn: SessionId, + table: usize, + visible_rows: &[SimRow], +) -> bool { + let source = visible_rows[planner.choose_index(visible_rows.len())].clone(); + let Some(row) = planner.unique_key_conflict_row(table, &source) else { + return false; + }; + planner.push_interaction(TableWorkloadInteraction::unique_key_conflict_insert(conn, table, row)); + true +} + +fn generate_supported_type_for_churn(planner: &mut ScenarioPlanner<'_>) -> AlgebraicType { + match planner.choose_index(6) { + 0 => AlgebraicType::I64, + 1 => AlgebraicType::U32, + 2 => AlgebraicType::I32, + 3 => AlgebraicType::U8, + 4 => AlgebraicType::I128, + _ => AlgebraicType::U128, + } +} + +fn candidate_new_indexes(planner: &ScenarioPlanner<'_>, table: usize) -> Vec> { + let table_plan = planner.table_plan(table); + let cols = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, column)| is_range_compatible(&column.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + let mut candidates = Vec::new(); + for width in 1..=cols.len().min(3) { + let candidate = cols[..width].to_vec(); + if !table_plan.extra_indexes.contains(&candidate) { + candidates.push(candidate); + } + } + candidates +} + +fn inclusive_bounds_for_rows( + rows: &[SimRow], + cols: &[u16], +) -> Option<(spacetimedb_sats::AlgebraicValue, spacetimedb_sats::AlgebraicValue)> { + let mut sorted = rows.to_vec(); + sorted.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = sorted.first()?.project_key(cols).to_algebraic_value(); + let upper = sorted.last()?.project_key(cols).to_algebraic_value(); + Some((lower, upper)) +} + +fn emit_query( + planner: &mut ScenarioPlanner<'_>, + conn: SessionId, + table: usize, + visible_rows: &[crate::schema::SimRow], +) -> bool { + if !planner.roll_percent(25) { + return false; + } + if visible_rows.is_empty() { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + } + + match planner.choose_index(4) { + 0 => { + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(id) = row.id() { + planner.push_interaction(TableWorkloadInteraction::point_lookup(conn, table, id)); + true + } else { + false + } + } + 1 => { + let col = choose_predicate_col(planner, table); + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(value) = row.values.get(col as usize).cloned() { + planner.push_interaction(TableWorkloadInteraction::predicate_count(conn, table, col, value)); + true + } else { + false + } + } + 2 => { + let extra_indexes = planner.table_plan(table).extra_indexes.clone(); + let Some(cols) = extra_indexes + .into_iter() + .find(|cols| range_cols_supported(planner, table, cols)) + else { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + }; + let mut rows = visible_rows.to_vec(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(&cols) + .to_algebraic_value() + .cmp(&rhs.project_key(&cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = rows[0].project_key(&cols).to_algebraic_value(); + let upper = rows[rows.len() - 1].project_key(&cols).to_algebraic_value(); + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + true + } + _ => { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + true + } + } +} + +fn choose_predicate_col(planner: &mut ScenarioPlanner<'_>, table: usize) -> u16 { + let column_count = planner.table_plan(table).columns.len(); + if column_count <= 1 { + 0 + } else { + 1 + planner.choose_index(column_count - 1) as u16 + } +} + +fn range_cols_supported(planner: &ScenarioPlanner<'_>, table: usize, cols: &[u16]) -> bool { + cols.iter().all(|col| { + planner + .table_plan(table) + .columns + .get(*col as usize) + .is_some_and(|column| is_range_compatible(&column.ty)) + }) +} + +fn is_range_compatible(ty: &AlgebraicType) -> bool { + matches!(ty, AlgebraicType::U64 | AlgebraicType::Bool) +} diff --git a/crates/dst/src/workload/table_ops/strategies.rs b/crates/dst/src/workload/table_ops/strategies.rs new file mode 100644 index 00000000000..42dbc6c2ee4 --- /dev/null +++ b/crates/dst/src/workload/table_ops/strategies.rs @@ -0,0 +1,66 @@ +//! Typed strategies specific to table-style workload generation. + +use crate::{ + client::SessionId, + sim::Rng, + workload::strategy::{Index, Strategy, Weighted}, +}; + +/// Choose one logical session uniformly from the current fixed-size session pool. +#[derive(Clone, Copy, Debug)] +pub(crate) struct ConnectionChoice { + pub(crate) connection_count: usize, +} + +impl Strategy for ConnectionChoice { + fn sample(&self, rng: &Rng) -> SessionId { + SessionId::from_index(Index::new(self.connection_count).sample(rng)) + } +} + +/// Choose one table uniformly. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TableChoice { + pub(crate) table_count: usize, +} + +impl Strategy for TableChoice { + fn sample(&self, rng: &Rng) -> usize { + Index::new(self.table_count).sample(rng) + } +} + +/// Weighted transaction control action. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum TxControlAction { + Begin, + Commit, + Rollback, + None, +} + +/// Strategy for begin/commit/rollback control flow. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TxControlChoice { + pub(crate) begin_pct: usize, + pub(crate) commit_pct: usize, + pub(crate) rollback_pct: usize, +} + +impl Strategy for TxControlChoice { + fn sample(&self, rng: &Rng) -> TxControlAction { + let begin = self.begin_pct.min(100); + let commit = self.commit_pct.min(100); + let rollback = self.rollback_pct.min(100); + let reserved = begin.saturating_add(commit).saturating_add(rollback).min(100); + let none = 100usize.saturating_sub(reserved); + + Weighted::new(vec![ + (begin, TxControlAction::Begin), + (commit, TxControlAction::Commit), + (rollback, TxControlAction::Rollback), + (none, TxControlAction::None), + ]) + .sample(rng) + } +} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs new file mode 100644 index 00000000000..6b589b0cdaf --- /dev/null +++ b/crates/dst/src/workload/table_ops/types.rs @@ -0,0 +1,311 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, SimRow}, + sim::Rng, +}; + +use super::generation::ScenarioPlanner; + +/// Scenario hook for shared table-oriented workloads. +/// +/// A scenario supplies the initial schema, scenario-specific commit-time +/// properties, and any final invariant over the collected outcome. +pub(crate) trait TableScenario: Clone { + fn generate_schema(&self, rng: &Rng) -> SchemaPlan; + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId); +} + +/// One generated workload step. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PlannedInteraction { + pub op: TableOperation, + /// Generator-side coverage/debug label. + /// + /// Correctness must not depend on this field. Properties predict expected + /// behavior from the model and `op`; this label only preserves intent in + /// summaries and failure reports. + pub case: TableInteractionCase, +} + +pub type TableWorkloadInteraction = PlannedInteraction; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum TableOperation { + /// Start an explicit write transaction on a connection. + BeginTx { conn: SessionId }, + /// Commit the connection's explicit write transaction. + CommitTx { conn: SessionId }, + /// Roll back the connection's explicit write transaction. + RollbackTx { conn: SessionId }, + /// Hold a read snapshot open while later reads observe stable state. + BeginReadTx { conn: SessionId }, + /// Release a previously opened read snapshot. + ReleaseReadTx { conn: SessionId }, + /// Insert one or more rows. + InsertRows { + conn: SessionId, + table: usize, + rows: Vec, + }, + /// Delete one or more rows. + DeleteRows { + conn: SessionId, + table: usize, + rows: Vec, + }, + /// Add a column to an existing table with a default for live rows. + AddColumn { + conn: SessionId, + table: usize, + column: ColumnPlan, + default: AlgebraicValue, + }, + /// Add a non-primary index after data exists. + AddIndex { + conn: SessionId, + table: usize, + cols: Vec, + }, + /// Query a row by primary id and compare against the model. + PointLookup { conn: SessionId, table: usize, id: u64 }, + /// Count rows by equality on one column and compare against the model. + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: AlgebraicValue, + }, + /// Scan an indexed range and compare against model filtering. + RangeScan { + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + }, + /// Scan all visible rows and compare against the model. + FullScan { conn: SessionId, table: usize }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum TableErrorKind { + UniqueConstraintViolation, + MissingRow, + WriteConflict, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum TableInteractionCase { + BeginTx, + CommitTx, + RollbackTx, + BeginReadTx, + ReleaseReadTx, + BeginTxConflict, + WriteConflictInsert, + Insert, + Delete, + ExactDuplicateInsert, + UniqueKeyConflictInsert, + DeleteMissing, + BatchInsert, + BatchDelete, + Reinsert, + AddColumn, + AddIndex, + PointLookup, + PredicateCount, + RangeScan, + FullScan, +} + +impl PlannedInteraction { + pub fn new(op: TableOperation, case: TableInteractionCase) -> Self { + Self { op, case } + } + + pub fn begin_tx(conn: SessionId) -> Self { + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTx) + } + + pub fn commit_tx(conn: SessionId) -> Self { + Self::new(TableOperation::CommitTx { conn }, TableInteractionCase::CommitTx) + } + + pub fn rollback_tx(conn: SessionId) -> Self { + Self::new(TableOperation::RollbackTx { conn }, TableInteractionCase::RollbackTx) + } + + pub fn begin_read_tx(conn: SessionId) -> Self { + Self::new(TableOperation::BeginReadTx { conn }, TableInteractionCase::BeginReadTx) + } + + pub fn release_read_tx(conn: SessionId) -> Self { + Self::new( + TableOperation::ReleaseReadTx { conn }, + TableInteractionCase::ReleaseReadTx, + ) + } + + pub fn begin_tx_conflict(conn: SessionId) -> Self { + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTxConflict) + } + + pub fn write_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_rows(conn, table, vec![row], TableInteractionCase::WriteConflictInsert) + } + + pub fn insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::Insert) + } + + pub fn insert_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::insert_rows(conn, table, vec![row], case) + } + + pub fn delete(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::delete_with_case(conn, table, row, TableInteractionCase::Delete) + } + + pub fn delete_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::delete_rows(conn, table, vec![row], case) + } + + pub fn exact_duplicate_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::ExactDuplicateInsert) + } + + pub fn unique_key_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::UniqueKeyConflictInsert) + } + + pub fn delete_missing(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::delete_with_case(conn, table, row, TableInteractionCase::DeleteMissing) + } + + pub fn batch_insert(conn: SessionId, table: usize, rows: Vec) -> Self { + Self::insert_rows(conn, table, rows, TableInteractionCase::BatchInsert) + } + + pub fn batch_delete(conn: SessionId, table: usize, rows: Vec) -> Self { + Self::delete_rows(conn, table, rows, TableInteractionCase::BatchDelete) + } + + fn insert_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::InsertRows { conn, table, rows }, case) + } + + fn delete_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::DeleteRows { conn, table, rows }, case) + } + + pub fn add_column(conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { + Self::new( + TableOperation::AddColumn { + conn, + table, + column, + default, + }, + TableInteractionCase::AddColumn, + ) + } + + pub fn add_index(conn: SessionId, table: usize, cols: Vec) -> Self { + Self::new( + TableOperation::AddIndex { conn, table, cols }, + TableInteractionCase::AddIndex, + ) + } + + pub fn point_lookup(conn: SessionId, table: usize, id: u64) -> Self { + Self::new( + TableOperation::PointLookup { conn, table, id }, + TableInteractionCase::PointLookup, + ) + } + + pub fn predicate_count(conn: SessionId, table: usize, col: u16, value: AlgebraicValue) -> Self { + Self::new( + TableOperation::PredicateCount { + conn, + table, + col, + value, + }, + TableInteractionCase::PredicateCount, + ) + } + + pub fn range_scan( + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + ) -> Self { + Self::new( + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + }, + TableInteractionCase::RangeScan, + ) + } + + pub fn full_scan(conn: SessionId, table: usize) -> Self { + Self::new(TableOperation::FullScan { conn, table }, TableInteractionCase::FullScan) + } +} + +/// Final state gathered from a table-workload engine after execution ends. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TableWorkloadOutcome { + /// Row count for each table in schema order. + pub final_row_counts: Vec, + /// Full committed rows for each table in schema order. + pub final_rows: Vec>, +} + +/// Per-session write transaction bookkeeping shared by locking targets. +pub(crate) struct ConnectionWriteState { + /// Open mutable transaction handle for each simulated session. + pub tx_by_connection: Vec>, + /// Session that currently owns the single-writer lock, if any. + pub active_writer: Option, +} + +impl ConnectionWriteState { + pub fn new(connection_count: usize) -> Self { + Self { + tx_by_connection: (0..connection_count).map(|_| None).collect(), + active_writer: None, + } + } + + pub fn ensure_known_connection(&self, conn: SessionId) -> Result<(), String> { + self.tx_by_connection + .get(conn.as_index()) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + pub fn ensure_writer_owner(&self, conn: SessionId, action: &str) -> Result<(), String> { + self.ensure_known_connection(conn)?; + match self.active_writer { + Some(owner) if owner == conn => Ok(()), + Some(owner) => Err(format!( + "connection {conn} cannot {action} while connection {owner} owns lock" + )), + None => Err(format!("connection {conn} has no transaction to {action}")), + } + } +} diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index e3eca56e5d9..3447e4fbf9a 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -199,8 +199,8 @@ where impl Local where - T: Send + Sync + 'static, - R: Repo + Send + Sync + 'static, + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, { /// Inspect how many transactions added via [`Self::append_tx`] are pending /// to be applied to the underlying [`Commitlog`]. diff --git a/crates/io/LICENSE b/crates/io/LICENSE new file mode 120000 index 00000000000..8540cf8a991 --- /dev/null +++ b/crates/io/LICENSE @@ -0,0 +1 @@ +../../licenses/BSL.txt \ No newline at end of file diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 6af30dc0f26..55ae62f074b 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -48,6 +48,7 @@ use std::fs::{self, File}; use std::io; use std::ops::{Range, RangeBounds}; use std::path::Path; +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::time::{Duration, Instant}; use std::{ collections::BTreeMap, @@ -1369,20 +1370,23 @@ impl SnapshotRepository { } } -/// Snapshot storage backend. -pub trait SnapshotRepo: Send + Sync { - type Pending: PendingSnapshot; - +/// Snapshot storage backend that can capture, read, list, and invalidate snapshots. +/// +/// Production uses the filesystem-backed [`SnapshotRepository`]. DST can use +/// [`MemorySnapshotRepository`] to keep snapshot storage inside the simulator +/// boundary instead of depending on temporary directories or host filesystem +/// behavior. +pub trait SnapshotStore: Send + Sync { /// Return the database identity associated with this snapshot backend. fn database_identity(&self) -> Identity; - /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. - fn create_snapshot<'db>( + /// Capture and finalize a snapshot at `tx_offset`. + fn capture_snapshot<'db>( &self, tables: &mut dyn Iterator, blobs: &'db dyn BlobStore, tx_offset: TxOffset, - ) -> Result; + ) -> Result; /// Reconstruct the snapshot at `tx_offset` using the supplied page pool. fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result; @@ -1395,6 +1399,25 @@ pub trait SnapshotRepo: Send + Sync { self.latest_snapshot_older_than(TxOffset::MAX) } + /// Invalidate every snapshot newer than `upper_bound`. + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; + + /// Invalidate the snapshot at `tx_offset`. + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; +} + +/// Filesystem-style snapshot backend with a pending snapshot phase and optional compression. +pub trait SnapshotRepo: SnapshotStore { + type Pending: PendingSnapshot; + + /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result; + /// Attempt to compress all snapshots that fall into `range`, and record /// the outcome in `stats`. /// @@ -1403,30 +1426,21 @@ pub trait SnapshotRepo: Send + Sync { /// /// See [CompressionStats] for how to interpret the results. fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError>; - - /// Invalidate every snapshot newer than `upper_bound`. - fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; - - /// Invalidate the snapshot at `tx_offset`. - fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; } -impl SnapshotRepo for SnapshotRepository { - type Pending = BoxedPendingSnapshot; - +impl SnapshotStore for SnapshotRepository { fn database_identity(&self) -> Identity { SnapshotRepository::database_identity(self) } - fn create_snapshot<'db>( + fn capture_snapshot<'db>( &self, tables: &mut dyn Iterator, blobs: &'db dyn BlobStore, tx_offset: TxOffset, - ) -> Result { - Ok(Box::new(SnapshotRepository::create_snapshot( - self, tables, blobs, tx_offset, - )?)) + ) -> Result { + self.create_snapshot(tables, blobs, tx_offset)?.sync_all()?; + Ok(tx_offset) } fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { @@ -1441,17 +1455,316 @@ impl SnapshotRepo for SnapshotRepository { SnapshotRepository::latest_snapshot(self) } + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +impl SnapshotRepo for SnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + Ok(Box::new(SnapshotRepository::create_snapshot( + self, tables, blobs, tx_offset, + )?)) + } + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError> { SnapshotRepository::compress_snapshots(self, stats, range) } +} + +/// In-memory snapshot repository for deterministic tests. +/// +/// This stores snapshot object bytes in process memory and reconstructs through +/// the same [`ReconstructedSnapshot`] shape as the filesystem repository. It is +/// not durable and intentionally does not model the on-disk two-phase flush +/// protocol; it is a simulator/test backend for semantic snapshot capture and +/// restore. +pub struct MemorySnapshotRepository { + database_identity: Identity, + replica_id: u64, + snapshots: RwLock>, +} + +impl MemorySnapshotRepository { + pub fn new(database_identity: Identity, replica_id: u64) -> Self { + Self { + database_identity, + replica_id, + snapshots: RwLock::new(BTreeMap::new()), + } + } + + pub fn database_identity(&self) -> Identity { + self.database_identity + } + + pub fn capture_snapshot<'db>( + &self, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.invalidate_newer_snapshots(tx_offset.saturating_sub(1))?; + let snapshot = MemorySnapshot::capture(self.database_identity, self.replica_id, tables, blobs, tx_offset)?; + self.write_snapshots()?.insert(tx_offset, snapshot); + Ok(tx_offset) + } + + pub fn read_snapshot( + &self, + tx_offset: TxOffset, + page_pool: &PagePool, + ) -> Result { + let snapshot = self + .read_snapshots()? + .get(&tx_offset) + .cloned() + .ok_or_else(|| memory_snapshot_not_found(tx_offset))?; + snapshot.reconstruct(page_pool) + } + + pub fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + Ok(self + .read_snapshots()? + .range(..=upper_bound) + .next_back() + .map(|(&tx_offset, _)| tx_offset)) + } + + pub fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + pub fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.retain(|tx_offset, _| *tx_offset <= upper_bound); + Ok(()) + } + + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.remove(&tx_offset); + Ok(()) + } + + fn read_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.read().map_err(|_| memory_snapshot_lock_poisoned()) + } + + fn write_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.write().map_err(|_| memory_snapshot_lock_poisoned()) + } +} + +impl SnapshotStore for MemorySnapshotRepository { + fn database_identity(&self) -> Identity { + MemorySnapshotRepository::database_identity(self) + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + MemorySnapshotRepository::capture_snapshot(self, tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + MemorySnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot(self) + } fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { - SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + MemorySnapshotRepository::invalidate_newer_snapshots(self, upper_bound) } fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { - SnapshotRepository::invalidate_snapshot(self, tx_offset) + MemorySnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +struct MemoryPendingSnapshot { + tx_offset: TxOffset, +} + +impl PendingSnapshot for MemoryPendingSnapshot { + fn sync_all(self: Box) -> Result { + Ok(self.tx_offset) + } +} + +impl SnapshotRepo for MemorySnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.capture_snapshot(tables, blobs, tx_offset)?; + Ok(Box::new(MemoryPendingSnapshot { tx_offset })) + } + + fn compress_snapshots(&self, _stats: &mut CompressionStats, _range: Range) -> Result<(), SnapshotError> { + Ok(()) + } +} + +#[derive(Clone)] +struct MemorySnapshot { + database_identity: Identity, + replica_id: u64, + tx_offset: TxOffset, + module_abi_version: [u16; 2], + blobs: Vec, + tables: BTreeMap>, +} + +impl MemorySnapshot { + fn capture<'db>( + database_identity: Identity, + replica_id: u64, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + let blobs = blobs + .iter_blobs() + .map(|(hash, uses, bytes)| MemoryBlob { + hash: *hash, + uses: uses as u32, + bytes: bytes.into(), + }) + .collect(); + + let tables = tables + .map(|table| { + let pages = table + .iter_pages_with_hashes() + .map(|(hash, page)| { + let bytes = bsatn::to_vec(page).map_err(|cause| SnapshotError::Serialize { + ty: ObjectType::Page(hash), + cause, + })?; + Ok(MemoryPage { hash, bytes }) + }) + .collect::, SnapshotError>>()?; + Ok((table.schema.table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(Self { + database_identity, + replica_id, + tx_offset, + module_abi_version: CURRENT_MODULE_ABI_VERSION, + blobs, + tables, + }) } + + fn reconstruct(self, page_pool: &PagePool) -> Result { + let source_repo = memory_snapshot_path(self.tx_offset); + let mut blob_store = HashMapBlobStore::default(); + for MemoryBlob { hash, uses, bytes } in self.blobs { + let computed = BlobHash::hash_from_bytes(&bytes); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Blob(hash), + expected: hash.data, + computed: computed.data, + source_repo: source_repo.clone(), + }); + } + blob_store.insert_with_uses(&hash, uses as usize, bytes); + } + + let tables = + self.tables + .into_iter() + .map(|(table_id, pages)| { + let pages = pages + .into_iter() + .map(|MemoryPage { hash, bytes }| { + let page = page_pool.take_deserialize_from(&bytes).map_err(|cause| { + SnapshotError::Deserialize { + ty: ObjectType::Page(hash), + source_repo: source_repo.clone(), + cause, + } + })?; + let computed = page.content_hash(); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Page(hash), + expected: *hash.as_bytes(), + computed: *computed.as_bytes(), + source_repo: source_repo.clone(), + }); + } + Ok(page) + }) + .collect::, SnapshotError>>()?; + Ok((table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(ReconstructedSnapshot { + database_identity: self.database_identity, + replica_id: self.replica_id, + tx_offset: self.tx_offset, + module_abi_version: self.module_abi_version, + blob_store, + tables, + compress_type: CompressType::None, + }) + } +} + +#[derive(Clone)] +struct MemoryBlob { + hash: BlobHash, + uses: u32, + bytes: Box<[u8]>, +} + +#[derive(Clone)] +struct MemoryPage { + hash: blake3::Hash, + bytes: Vec, +} + +fn memory_snapshot_lock_poisoned() -> SnapshotError { + SnapshotError::Io(io::Error::other("memory snapshot repository lock poisoned")) +} + +fn memory_snapshot_not_found(tx_offset: TxOffset) -> SnapshotError { + SnapshotError::Io(io::Error::new( + io::ErrorKind::NotFound, + format!("memory snapshot {tx_offset} not found"), + )) +} + +fn memory_snapshot_path(tx_offset: TxOffset) -> PathBuf { + PathBuf::from(format!("")) } pub struct ReconstructedSnapshot { diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 1c6c51fe8e7..e7133191ffa 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -23,6 +23,7 @@ use spacetimedb_lib::{ }; use spacetimedb_paths::{server::SnapshotsPath, FromPathUnchecked}; use spacetimedb_primitives::TableId; +use spacetimedb_runtime::Handle; use spacetimedb_sats::{product, raw_identifier::RawIdentifier}; use spacetimedb_schema::{ def::ModuleDef, @@ -230,11 +231,13 @@ async fn create_snapshot(repo: Arc) -> anyhow::Result::default())), - snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled, rt.clone())), + snapshot_store: Some(snapshot_worker.snapshot_store()), + snapshots: Some(snapshot_worker), runtime: rt, }; let db = TestDB::open_db(EmptyHistory::new(), Some(persistence), None, 0)?; diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index 50f6db19257..bc8241938d2 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,12 +1,18 @@ +#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; +#[cfg(not(simulation))] use spacetimedb_pg::pg_server; +#[cfg(not(simulation))] use std::io::{self, Write}; +#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; +#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -15,11 +21,14 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; +#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -111,6 +120,7 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); + #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -198,13 +208,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); + #[cfg(simulation)] + { + let _ = (pg_port, ctx, listen_addr); + anyhow::bail!("standalone start server mode is not supported under simulation"); + } + + #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + #[cfg(not(simulation))] + { + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + } + #[cfg(not(simulation))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); + #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); + #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -250,40 +273,44 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" - ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + #[cfg(not(simulation))] + { + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + ))?; + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks + } } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + #[cfg(not(simulation))] Ok(()) } @@ -302,6 +329,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. +#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -336,11 +364,13 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] +#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } +#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -354,6 +384,7 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } +#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -424,6 +455,7 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. +#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -438,6 +470,7 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. +#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); diff --git a/tools/ci/README.md b/tools/ci/README.md index 9b71b406fef..23bcfc6c23b 100644 --- a/tools/ci/README.md +++ b/tools/ci/README.md @@ -239,6 +239,17 @@ Usage: docs - `--help`: Print help +### `io-boundary` + +**Usage:** +```bash +Usage: io-boundary +``` + +**Options:** + +- `--help`: Print help + ### `help` **Usage:** diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index 2454ea3349f..99a9a2b86c3 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -161,6 +161,8 @@ enum CiCmd { VersionUpgradeCheck, /// Builds the docs site. Docs, + /// Checks that runtime is not used as a Tokio-shaped IO facade. + IoBoundary, } fn run_all_clap_subcommands(skips: &[String]) -> Result<()> { @@ -189,6 +191,99 @@ fn tracked_rs_files_under(path: &str) -> Result> { .collect()) } +fn check_io_boundary() -> Result<()> { + ensure_repo_root()?; + + let mut violations = Vec::new(); + for root in ["crates/runtime", "crates/datastore", "crates/core", "crates/commitlog"] { + for path in tracked_rs_files_under(root)? { + check_file_for_runtime_io_facade(&path, &mut violations)?; + } + } + + if violations.is_empty() { + return Ok(()); + } + + for violation in &violations { + eprintln!("{violation}"); + } + bail!( + "spacetimedb_runtime must not be used as a Tokio-shaped io/fs/net facade; use Tokio directly in normal-only code and semantic seams for simulation code" + ); +} + +fn check_file_for_runtime_io_facade(path: &Path, violations: &mut Vec) -> Result<()> { + let contents = fs::read_to_string(path)?; + let mut in_runtime_use_tree = false; + + for (line_idx, line) in contents.lines().enumerate() { + let line_no = line_idx + 1; + let code = line.split("//").next().unwrap_or(line); + + for module in ["io", "fs", "net", "blocking_fs"] { + if code.contains(&format!("spacetimedb_runtime::{module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade usage", + path.display() + )); + } + if path == Path::new("crates/runtime/src/lib.rs") && code.contains(&format!("pub mod {module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade export", + path.display() + )); + } + } + + if in_runtime_use_tree { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } + } + if code.contains("};") { + in_runtime_use_tree = false; + } + continue; + } + + if code.contains("use spacetimedb_runtime::{") { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } + } + if !code.contains("};") { + in_runtime_use_tree = true; + } + } + } + + Ok(()) +} + +fn use_tree_mentions_token(code: &str, forbidden: &str) -> bool { + let mut token = String::new(); + for ch in code.chars() { + if ch == '_' || ch.is_ascii_alphanumeric() { + token.push(ch); + continue; + } + if token == forbidden { + return true; + } + token.clear(); + } + token == forbidden +} + fn run_publish_checks() -> Result<()> { cmd!("bash", "-lc", "test -d venv || python3 -m venv venv").run()?; cmd!("venv/bin/pip3", "install", "argparse", "toml").run()?; @@ -352,6 +447,7 @@ fn main() -> Result<()> { Some(CiCmd::Lint) => { ensure_repo_root()?; + check_io_boundary()?; // `cargo fmt --all` only checks files that Cargo discovers through workspace/package targets. // However, we also keep Rust sources in a locations that are tracked but not part of our workspace, // so this approach properly catches all the files, where `cargo fmt` does not. @@ -540,6 +636,10 @@ fn main() -> Result<()> { run_docs_build()?; } + Some(CiCmd::IoBoundary) => { + check_io_boundary()?; + } + None => run_all_clap_subcommands(&cli.skip)?, }