diff --git a/Cargo.lock b/Cargo.lock index 95fd0e6e1c3..40ccd6c99aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5226,9 +5226,9 @@ dependencies = [ [[package]] name = "pg_interval" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe46640b465e284b048ef065cbed8ef17a622878d310c724578396b4cfd00df2" +checksum = "7ceff720b1579b383347d48e5df5f604042adaa6a06f640b1f1e3c065f40766d" dependencies = [ "bytes", "chrono", @@ -7932,7 +7932,6 @@ dependencies = [ "spacetimedb-snapshot", "spacetimedb-subscription", "spacetimedb-table", - "spacetimedb-vm", "sqlparser", "strum", "tabled", @@ -8642,29 +8641,6 @@ dependencies = [ "zip", ] -[[package]] -name = "spacetimedb-vm" -version = "2.0.5" -dependencies = [ - "anyhow", - "arrayvec", - "derive_more 0.99.20", - "itertools 0.12.1", - "log", - "smallvec", - "spacetimedb-data-structures", - "spacetimedb-execution", - "spacetimedb-lib 2.0.5", - "spacetimedb-primitives 2.0.5", - "spacetimedb-sats 2.0.5", - "spacetimedb-schema", - "spacetimedb-table", - "tempfile", - "thiserror 1.0.69", - "tracing", - "typed-arena", -] - [[package]] name = "spin" version = "0.9.8" @@ -8750,10 +8726,8 @@ dependencies = [ "rusqlite", "rust_decimal", "spacetimedb-core", - "spacetimedb-datastore", "spacetimedb-lib 2.0.5", "spacetimedb-sats 2.0.5", - "spacetimedb-vm", "sqllogictest", "sqllogictest-engines", "tempfile", @@ -9857,12 +9831,6 @@ dependencies = [ "utf-8", ] -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - [[package]] name = "typedmap" version = "0.6.0" diff --git a/Cargo.toml b/Cargo.toml index e4066079213..9e89417029c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ members = [ "crates/table", "crates/testing", "crates/update", - "crates/vm", "modules/benchmarks", "modules/keynote-benchmarks", "modules/perf-test", @@ -143,7 +142,6 @@ spacetimedb-schema = { path = "crates/schema", version = "=2.0.5" } spacetimedb-standalone = { path = "crates/standalone", version = "=2.0.5" } spacetimedb-sql-parser = { path = "crates/sql-parser", version = "=2.0.5" } spacetimedb-table = { path = "crates/table", version = "=2.0.5" } -spacetimedb-vm = { path = "crates/vm", version = "=2.0.5" } spacetimedb-fs-utils = { path = "crates/fs-utils", version = "=2.0.5" } spacetimedb-snapshot = { path = "crates/snapshot", version = "=2.0.5" } spacetimedb-subscription = { path = "crates/subscription", version = "=2.0.5" } diff --git a/crates/bench/benches/subscription.rs b/crates/bench/benches/subscription.rs index c9014325eed..ebd8e83e35c 100644 --- a/crates/bench/benches/subscription.rs +++ b/crates/bench/benches/subscription.rs @@ -2,24 +2,19 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use spacetimedb::client::consume_each_list::ConsumeEachBuffer; use spacetimedb::db::relational_db::RelationalDB; use spacetimedb::error::DBError; -use spacetimedb::host::module_host::DatabaseTableUpdate; use spacetimedb::identity::AuthCtx; use spacetimedb::sql::ast::SchemaViewer; -use spacetimedb::subscription::query::compile_read_only_queryset; use spacetimedb::subscription::row_list_builder_pool::BsatnRowListBuilderPool; -use spacetimedb::subscription::subscription::ExecutionSet; use spacetimedb::subscription::tx::DeltaTx; use spacetimedb::subscription::{collect_table_update, TableUpdateType}; use spacetimedb_bench::database::BenchDatabase as _; use spacetimedb_bench::spacetime_raw::SpacetimeRaw; -use spacetimedb_client_api_messages::websocket::v1::{BsatnFormat, Compression}; +use spacetimedb_client_api_messages::websocket::v1::BsatnFormat; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_execution::pipelined::PipelinedProject; use spacetimedb_primitives::{col_list, TableId}; use spacetimedb_query::compile_subscription; -use spacetimedb_sats::{bsatn, product, AlgebraicType, AlgebraicValue, ProductValue}; - -use spacetimedb_schema::table_name::TableName; +use spacetimedb_sats::{bsatn, product, AlgebraicType, AlgebraicValue}; #[cfg(not(target_env = "msvc"))] use tikv_jemallocator::Jemalloc; @@ -52,15 +47,6 @@ fn create_table_footprint(db: &RelationalDB) -> Result { db.create_table_for_test("footprint", schema, indexes) } -fn insert_op(table_id: TableId, table_name: &str, row: ProductValue) -> DatabaseTableUpdate { - DatabaseTableUpdate { - table_id, - table_name: TableName::for_test(table_name), - inserts: [row].into(), - deletes: [].into(), - } -} - fn eval(c: &mut Criterion) { let raw = SpacetimeRaw::build(false).unwrap(); @@ -115,16 +101,12 @@ fn eval(c: &mut Criterion) { let footprint = AlgebraicValue::sum(1, AlgebraicValue::unit()); let owner = 6u64; - let new_lhs_row = product!(entity_id, owner, footprint); - let new_rhs_row = product!(entity_id, chunk_index, x, z, dimension); - - let ins_lhs = insert_op(lhs, "footprint", new_lhs_row); - let ins_rhs = insert_op(rhs, "location", new_rhs_row); - let update = [&ins_lhs, &ins_rhs]; + let _new_lhs_row = product!(entity_id, owner, footprint); + let _new_rhs_row = product!(entity_id, chunk_index, x, z, dimension); let bsatn_rlb_pool = black_box(BsatnRowListBuilderPool::new()); - // A benchmark runner for the new query engine + // A benchmark runner for the subscription engine. let bench_query = |c: &mut Criterion, name, sql| { c.bench_function(name, |b| { let tx = raw.db.begin_tx(Workload::Subscribe); @@ -154,20 +136,6 @@ fn eval(c: &mut Criterion) { }); }; - let bench_eval = |c: &mut Criterion, name, sql| { - c.bench_function(name, |b| { - let tx = raw.db.begin_tx(Workload::Update); - let query = compile_read_only_queryset(&raw.db, &AuthCtx::for_testing(), &tx, sql).unwrap(); - let query: ExecutionSet = query.into(); - - b.iter(|| { - let updates = - black_box(query.eval::(&raw.db, &tx, &bsatn_rlb_pool, None, Compression::None)); - updates.consume_each_list(&mut |buffer| bsatn_rlb_pool.try_put(buffer)); - }) - }); - }; - // Join 1M rows on the left with 12K rows on the right. // Note, this should use an index join so as not to read the entire footprint table. let semijoin = format!( @@ -183,66 +151,6 @@ fn eval(c: &mut Criterion) { bench_query(c, "footprint-scan", "select * from footprint"); bench_query(c, "footprint-semijoin", &semijoin); bench_query(c, "index-scan-multi", index_scan_multi); - - // To profile this benchmark for 30s - // samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- full-scan --exact --profile-time=30 - // Iterate 1M rows. - bench_eval(c, "full-scan", "select * from footprint"); - - // To profile this benchmark for 30s - // samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- full-join --exact --profile-time=30 - // Join 1M rows on the left with 12K rows on the right. - // Note, this should use an index join so as not to read the entire footprint table. - let name = format!( - r#" - select footprint.* - from footprint join location on footprint.entity_id = location.entity_id - where location.chunk_index = {chunk_index} - "# - ); - bench_eval(c, "full-join", &name); - - // To profile this benchmark for 30s - // samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- incr-select --exact --profile-time=30 - c.bench_function("incr-select", |b| { - // A passthru executed independently of the database. - let select_lhs = "select * from footprint"; - let select_rhs = "select * from location"; - let tx = &raw.db.begin_tx(Workload::Update); - let query_lhs = compile_read_only_queryset(&raw.db, &AuthCtx::for_testing(), tx, select_lhs).unwrap(); - let query_rhs = compile_read_only_queryset(&raw.db, &AuthCtx::for_testing(), tx, select_rhs).unwrap(); - let query = ExecutionSet::from_iter(query_lhs.into_iter().chain(query_rhs)); - let tx = &tx.into(); - - b.iter(|| drop(black_box(query.eval_incr_for_test(&raw.db, tx, &update, None)))) - }); - - // To profile this benchmark for 30s - // samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- incr-join --exact --profile-time=30 - c.bench_function("incr-join", |b| { - // Not a passthru - requires reading of database state. - let join = format!( - "\ - select footprint.* \ - from footprint join location on footprint.entity_id = location.entity_id \ - where location.chunk_index = {chunk_index}" - ); - let tx = &raw.db.begin_tx(Workload::Update); - let query = compile_read_only_queryset(&raw.db, &AuthCtx::for_testing(), tx, &join).unwrap(); - let query: ExecutionSet = query.into(); - let tx = &tx.into(); - - b.iter(|| drop(black_box(query.eval_incr_for_test(&raw.db, tx, &update, None)))); - }); - - // To profile this benchmark for 30s - // samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- query-indexes-multi --exact --profile-time=30 - // Iterate 1M rows. - bench_eval( - c, - "query-indexes-multi", - "select * from location WHERE x = 0 AND z = 10000 AND dimension = 0", - ); } criterion_group!(benches, eval); diff --git a/crates/client-api/src/lib.rs b/crates/client-api/src/lib.rs index 9cb494691ef..784ff0862b1 100644 --- a/crates/client-api/src/lib.rs +++ b/crates/client-api/src/lib.rs @@ -166,11 +166,7 @@ impl Host { .await .map_err(|e| { log::warn!("{e}"); - if let Some(auth_err) = e.get_auth_error() { - (StatusCode::UNAUTHORIZED, auth_err.to_string()) - } else { - (StatusCode::BAD_REQUEST, e.to_string()) - } + (StatusCode::BAD_REQUEST, e.to_string()) })?; let total_duration = sql_start.elapsed(); diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index e0934d245b8..ad313c1fc27 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -31,7 +31,6 @@ spacetimedb-query.workspace = true spacetimedb-sats = { workspace = true, features = ["serde"] } spacetimedb-schema.workspace = true spacetimedb-table.workspace = true -spacetimedb-vm.workspace = true spacetimedb-snapshot.workspace = true spacetimedb-subscription.workspace = true spacetimedb-expr.workspace = true @@ -156,7 +155,6 @@ spacetimedb-lib = { path = "../lib", features = ["proptest", "test"] } spacetimedb-sats = { path = "../sats", features = ["proptest"] } spacetimedb-commitlog = { path = "../commitlog", features = ["test"] } spacetimedb-datastore = { path = "../datastore/", features = ["test"] } -spacetimedb-vm = { workspace = true, features = ["test"]} criterion.workspace = true # Also as dev-dependencies for use in _this_ crate's tests. diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 2819dbe366a..5d88b2dd7b3 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -42,7 +42,6 @@ use spacetimedb_lib::ConnectionId; use spacetimedb_lib::Identity; use spacetimedb_paths::server::{ReplicaDir, SnapshotsPath}; use spacetimedb_primitives::*; -use spacetimedb_sats::algebraic_type::fmt::fmt_algebraic_type; use spacetimedb_sats::memory_usage::MemoryUsage; use spacetimedb_sats::raw_identifier::RawIdentifier; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; @@ -56,8 +55,6 @@ use spacetimedb_snapshot::{ReconstructedSnapshot, SnapshotError, SnapshotReposit use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; -use spacetimedb_vm::errors::{ErrorType, ErrorVm}; -use spacetimedb_vm::ops::parse; use std::borrow::Cow; use std::io; use std::ops::{Bound, RangeBounds}; @@ -1511,32 +1508,6 @@ impl RelationalDB { Ok(None) } - /// Read the value of [ST_VARNAME_SLOW_QRY] from `st_var` - pub(crate) fn query_limit(&self, tx: &Tx) -> Result, DBError> { - if let Some(StVarValue::U64(ms)) = self.read_var(tx, StVarName::SlowQryThreshold)? { - return Ok(Some(ms)); - } - Ok(None) - } - - /// Read the value of [ST_VARNAME_SLOW_SUB] from `st_var` - #[allow(dead_code)] - pub(crate) fn sub_limit(&self, tx: &Tx) -> Result, DBError> { - if let Some(StVarValue::U64(ms)) = self.read_var(tx, StVarName::SlowSubThreshold)? { - return Ok(Some(ms)); - } - Ok(None) - } - - /// Read the value of [ST_VARNAME_SLOW_INC] from `st_var` - #[allow(dead_code)] - pub(crate) fn incr_limit(&self, tx: &Tx) -> Result, DBError> { - if let Some(StVarValue::U64(ms)) = self.read_var(tx, StVarName::SlowIncThreshold)? { - return Ok(Some(ms)); - } - Ok(None) - } - /// Read the value of a system variable from `st_var` pub(crate) fn read_var(&self, tx: &Tx, name: StVarName) -> Result, DBError> { if let Some(row_ref) = self @@ -1548,31 +1519,6 @@ impl RelationalDB { Ok(None) } - /// Update the value of a system variable in `st_var` - pub(crate) fn write_var(&self, tx: &mut MutTx, name: StVarName, literal: &str) -> Result<(), DBError> { - let value = Self::parse_var(name, literal)?; - if let Some(row_ref) = self - .iter_by_col_eq_mut(tx, ST_VAR_ID, StVarFields::Name.col_id(), &name.into())? - .next() - { - self.delete(tx, ST_VAR_ID, [row_ref.pointer()]); - } - tx.insert_via_serialize_bsatn(ST_VAR_ID, &StVarRow { name, value })?; - Ok(()) - } - - /// Parse the literal representation of a system variable - fn parse_var(name: StVarName, literal: &str) -> Result { - StVarValue::try_from_primitive(parse::parse(literal, &name.type_of())?).map_err(|v| { - ErrorVm::Type(ErrorType::Parse { - value: literal.to_string(), - ty: fmt_algebraic_type(&name.type_of()).to_string(), - err: format!("error parsing value: {v:?}"), - }) - .into() - }) - } - /// Write `rows` into a (sender) view's backing table. /// /// # Process @@ -2353,9 +2299,7 @@ mod tests { use super::tests_utils::begin_mut_tx; use super::*; - use crate::db::relational_db::tests_utils::{ - begin_tx, insert, make_snapshot, with_auto_commit, with_read_only, TestDB, - }; + use crate::db::relational_db::tests_utils::{begin_tx, insert, make_snapshot, TestDB}; use anyhow::bail; use bytes::Bytes; use commitlog::payload::txdata; @@ -2465,18 +2409,6 @@ mod tests { Ok(()) } - #[test] - fn test_system_variables() { - let db = TestDB::durable().expect("failed to create db"); - let _ = with_auto_commit(&db, |tx| db.write_var(tx, StVarName::RowLimit, "5")); - assert_eq!( - 5, - with_read_only(&db, |tx| db.row_limit(tx)) - .expect("failed to read from st_var") - .expect("row_limit does not exist") - ); - } - #[test] fn test_open_twice() -> ResultTest<()> { let stdb = TestDB::durable()?; diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 8ebee4ce4af..226c2700d08 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -25,8 +25,6 @@ use spacetimedb_sats::hash::Hash; use spacetimedb_sats::product_value::InvalidFieldError; use spacetimedb_schema::def::error::{LibError, RelationError, SchemaErrors}; use spacetimedb_schema::relation::FieldName; -use spacetimedb_vm::errors::{ErrorKind, ErrorLang, ErrorType, ErrorVm}; -use spacetimedb_vm::expr::Crud; pub use spacetimedb_datastore::error::{DatastoreError, IndexError, SequenceError, TableError}; @@ -42,8 +40,6 @@ pub enum SubscriptionError { NotFound(IndexId), #[error("Empty string")] Empty, - #[error("Queries with side effects not allowed: {0:?}")] - SideEffect(Crud), #[error("Unsupported query on subscription: {0:?}")] Unsupported(String), #[error("Subscribing to queries in one call is not supported")] @@ -75,10 +71,6 @@ pub enum PlanError { DatabaseInternal(Box), #[error("Relation Error: `{0}`")] Relation(#[from] RelationError), - #[error("{0}")] - VmError(#[from] ErrorVm), - #[error("{0}")] - TypeCheck(#[from] ErrorType), } #[derive(Error, Debug)] @@ -121,10 +113,6 @@ pub enum DBError { SledDbError(#[from] sled::Error), #[error("Mutex was poisoned acquiring lock on MessageLog: {0}")] MessageLogPoisoned(String), - #[error("VmError: {0}")] - Vm(#[from] ErrorVm), - #[error("VmErrorUser: {0}")] - VmUser(#[from] ErrorLang), #[error("SubscriptionError: {0}")] Subscription(#[from] SubscriptionError), #[error("ClientError: {0}")] @@ -163,23 +151,6 @@ pub enum DBError { View(#[from] ViewCallError), } -impl DBError { - pub fn get_auth_error(&self) -> Option<&ErrorLang> { - if let Self::VmUser(err) = self - && err.kind == ErrorKind::Unauthorized - { - return Some(err); - } - None - } -} - -impl From for ErrorVm { - fn from(err: DBError) -> Self { - ErrorVm::Other(err.into()) - } -} - impl From for DBError { fn from(value: InvalidFieldError) -> Self { LibError::from(value).into() @@ -324,12 +295,6 @@ impl From for NodesError { } } -impl From for NodesError { - fn from(err: ErrorVm) -> Self { - DBError::from(err).into() - } -} - #[derive(Debug, Error)] pub enum RestoreSnapshotError { #[error("Snapshot has incorrect database_identity: expected {expected} but found {actual}")] diff --git a/crates/core/src/estimation.rs b/crates/core/src/estimation.rs index dbeb0b11dcf..13fa5bb22c1 100644 --- a/crates/core/src/estimation.rs +++ b/crates/core/src/estimation.rs @@ -3,14 +3,8 @@ use spacetimedb_datastore::locking_tx_datastore::{state_view::StateView as _, Nu use spacetimedb_lib::query::Delta; use spacetimedb_physical_plan::plan::{HashJoin, IxJoin, IxScan, PhysicalPlan, Sarg, TableScan}; use spacetimedb_primitives::{ColList, TableId}; -use spacetimedb_vm::expr::{Query, QueryExpr, SourceExpr}; -/// The estimated number of rows that a query plan will return. -pub fn num_rows(tx: &Tx, expr: &QueryExpr) -> u64 { - row_est(tx, &expr.source, &expr.query) -} - -/// Use cardinality estimates to predict the total number of rows scanned by a query +/// Use cardinality estimates to predict the total number of rows scanned by a query. pub fn estimate_rows_scanned(tx: &Tx, plan: &PhysicalPlan) -> u64 { match plan { PhysicalPlan::TableScan(..) | PhysicalPlan::IxScan(..) => row_estimate(tx, plan), @@ -45,13 +39,11 @@ pub fn estimate_rows_scanned(tx: &Tx, plan: &PhysicalPlan) -> u64 { } } -/// Estimate the cardinality of a physical plan +/// Estimate the cardinality of a physical plan. pub fn row_estimate(tx: &Tx, plan: &PhysicalPlan) -> u64 { match plan { - // Use a row limit as the estimate if present PhysicalPlan::TableScan(TableScan { limit: Some(n), .. }, _) | PhysicalPlan::IxScan(IxScan { limit: Some(n), .. }, _) => *n, - // Table scans return the number of rows in the table PhysicalPlan::TableScan( TableScan { schema, @@ -60,7 +52,6 @@ pub fn row_estimate(tx: &Tx, plan: &PhysicalPlan) -> u64 { }, _, ) => tx.table_row_count(schema.table_id).unwrap_or_default(), - // We don't estimate the cardinality of delta scans currently PhysicalPlan::TableScan( TableScan { limit: None, @@ -69,9 +60,6 @@ pub fn row_estimate(tx: &Tx, plan: &PhysicalPlan) -> u64 { }, _, ) => 0, - // The selectivity of a point index scan is 1 / NDV, - // where NDV is the Number of Distinct Values of a column. - // Note, this assumes a uniform distribution of column values. PhysicalPlan::IxScan( ix @ IxScan { arg: Sarg::Eq(last_col, _), @@ -83,17 +71,11 @@ pub fn row_estimate(tx: &Tx, plan: &PhysicalPlan) -> u64 { cols.push(*last_col); index_row_est(tx, ix.schema.table_id, &cols) } - // For all other index scans we assume a worst-case scenario. PhysicalPlan::IxScan(IxScan { schema, .. }, _) => tx.table_row_count(schema.table_id).unwrap_or_default(), - // Same for filters PhysicalPlan::Filter(input, _) => row_estimate(tx, input), - // Nested loop joins are cross joins PhysicalPlan::NLJoin(lhs, rhs) => row_estimate(tx, lhs).saturating_mul(row_estimate(tx, rhs)), - // Unique joins return a maximal estimation. - // We assume every lhs row has a matching rhs row. PhysicalPlan::IxJoin(IxJoin { lhs, unique: true, .. }, _) | PhysicalPlan::HashJoin(HashJoin { lhs, unique: true, .. }, _) => row_estimate(tx, lhs), - // Otherwise we estimate the rows returned from the rhs PhysicalPlan::IxJoin( IxJoin { lhs, rhs, rhs_field, .. @@ -106,59 +88,7 @@ pub fn row_estimate(tx: &Tx, plan: &PhysicalPlan) -> u64 { } } -/// The estimated number of rows that a query sub-plan will return. -fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 { - match ops { - // The base case is the table row count. - [] => src.table_id().and_then(|id| tx.table_row_count(id)).unwrap_or(0), - // Walk in reverse from the end (`op`) to the beginning. - [input @ .., op] => match op { - // How selective is an index lookup? - // We assume a uniform distribution of keys, - // which implies a selectivity = 1 / NDV, - // where NDV stands for Number of Distinct Values. - Query::IndexScan(scan) if scan.is_point() => { - index_row_est(tx, scan.table.table_id, &scan.columns) - } - // We assume projections select 100% of their input rows. - Query::Project(..) - // How selective is an arbitrary predicate? - // If it is not sargable, - // meaning it cannot be satisfied using an index, - // we assume the worst-case scenario, - // that it will select all of its input rows. - // That is we set the selectivity = 1. - | Query::Select(_) - // We do the same for sargable range conditions. - | Query::IndexScan(_) => { - row_est(tx, src, input) - } - // How selective is an index join? - // We have an estimate for the number of probe side rows, - // We have an estimate for the number of rows each index probe will return. - // Multiplying both estimates together will give us our expectation. - Query::IndexJoin(join) => { - row_est(tx, &join.probe_side.source, &join.probe_side.query) - .saturating_mul( - index_row_est(tx, src.table_id().unwrap(), &join.index_col.into()) - ) - } - // Since inner join is our most expensive operation, - // we maximally overestimate its output cardinality, - // as though each row from the left joins with each row from the right. - Query::JoinInner(join) => { - row_est(tx, src, input) - .saturating_mul( - row_est(tx, &join.rhs.source, &join.rhs.query) - ) - } - }, - } -} - /// The estimated number of rows that an index probe will return. -/// Note this method is not applicable to range scans, -/// but it does work for multi column indices. fn index_row_est(tx: &Tx, table_id: TableId, cols: &ColList) -> u64 { let table_rc = || tx.table_row_count(table_id).unwrap_or_default(); match tx.num_distinct_values(table_id, cols) { @@ -170,35 +100,34 @@ fn index_row_est(tx: &Tx, table_id: TableId, cols: &ColList) -> u64 { #[cfg(test)] mod tests { + use super::{estimate_rows_scanned, row_estimate}; use crate::db::relational_db::tests_utils::{begin_tx, insert, with_auto_commit}; + use crate::db::relational_db::{tests_utils::TestDB, RelationalDB}; + use crate::error::DBError; use crate::sql::ast::SchemaViewer; - use crate::{ - db::relational_db::{tests_utils::TestDB, RelationalDB}, - error::DBError, - estimation::num_rows, - sql::compiler::compile_sql, - }; use spacetimedb_lib::{identity::AuthCtx, AlgebraicType}; use spacetimedb_query::compile_subscription; use spacetimedb_sats::product; - use spacetimedb_vm::expr::CrudExpr; - - use super::row_estimate; fn in_mem_db() -> TestDB { TestDB::in_memory().expect("failed to make test db") } - fn num_rows_for(db: &RelationalDB, sql: &str) -> u64 { + fn estimate_for(db: &RelationalDB, sql: &str) -> u64 { + let auth = AuthCtx::for_testing(); let tx = begin_tx(db); - match &*compile_sql(db, &AuthCtx::for_testing(), &tx, sql).expect("Failed to compile sql") { - [CrudExpr::Query(expr)] => num_rows(&tx, expr), - exprs => panic!("unexpected result from compilation: {exprs:#?}"), - } + let tx = SchemaViewer::new(&tx, &auth); + + compile_subscription(sql, &tx, &auth) + .map(|(plans, ..)| plans) + .expect("failed to compile sql query") + .into_iter() + .map(|plan| plan.optimize(&auth).expect("failed to optimize sql query")) + .map(|plan| row_estimate(&tx, &plan)) + .sum() } - /// Using the new query plan - fn new_row_estimate(db: &RelationalDB, sql: &str) -> u64 { + fn scanned_for(db: &RelationalDB, sql: &str) -> u64 { let auth = AuthCtx::for_testing(); let tx = begin_tx(db); let tx = SchemaViewer::new(&tx, &auth); @@ -208,15 +137,10 @@ mod tests { .expect("failed to compile sql query") .into_iter() .map(|plan| plan.optimize(&auth).expect("failed to optimize sql query")) - .map(|plan| row_estimate(&tx, &plan)) + .map(|plan| estimate_rows_scanned(&tx, plan.physical_plan())) .sum() } - const NUM_T_ROWS: u64 = 10; - const NDV_T: u64 = 5; - const NUM_S_ROWS: u64 = 2; - const NDV_S: u64 = 2; - fn create_table_t(db: &RelationalDB, indexed: bool) { let indexes = &[0.into()]; let indexes = if indexed { indexes } else { &[] as &[_] }; @@ -225,110 +149,25 @@ mod tests { .expect("Failed to create table"); with_auto_commit(db, |tx| -> Result<(), DBError> { - for i in 0..NUM_T_ROWS { - insert(db, tx, table_id, &product![i % NDV_T, i]).expect("failed to insert into table"); + for i in 0u64..10u64 { + insert(db, tx, table_id, &product![i % 5, i]).expect("failed to insert into table"); } Ok(()) }) .expect("failed to insert into table"); } - fn create_table_s(db: &RelationalDB, indexed: bool) { - let indexes = &[0.into(), 1.into()]; - let indexes = if indexed { indexes } else { &[] as &[_] }; - let rhs = db - .create_table_for_test("S", &["a", "c"].map(|n| (n, AlgebraicType::U64)), indexes) - .expect("Failed to create table"); - - with_auto_commit(db, |tx| -> Result<(), DBError> { - for i in 0..NUM_S_ROWS { - insert(db, tx, rhs, &product![i, i]).expect("failed to insert into table"); - } - Ok(()) - }) - .expect("failed to insert into table"); - } - - fn create_empty_table_r(db: &RelationalDB, indexed: bool) { - let indexes = &[0.into()]; - let indexes = if indexed { indexes } else { &[] as &[_] }; - db.create_table_for_test("R", &["a", "b"].map(|n| (n, AlgebraicType::U64)), indexes) - .expect("Failed to create table"); - } - - /// Cardinality estimation for an index lookup depends only on - /// (1) the total number of rows, - /// (2) the number of distinct values. #[test] fn cardinality_estimation_index_lookup() { let db = in_mem_db(); create_table_t(&db, true); - let sql = "select * from T where a = 0"; - let est = NUM_T_ROWS / NDV_T; - assert_eq!(est, num_rows_for(&db, sql)); - assert_eq!(est, new_row_estimate(&db, sql)); - } - - #[test] - fn cardinality_estimation_0_ndv() { - let db = in_mem_db(); - create_empty_table_r(&db, true); - let sql = "select * from R where a = 0"; - assert_eq!(0, num_rows_for(&db, sql)); - assert_eq!(0, new_row_estimate(&db, sql)); - } - - /// We estimate an index range to return all input rows. - #[test] - fn cardinality_estimation_index_range() { - let db = in_mem_db(); - create_table_t(&db, true); - let sql = "select * from T where a > 0 and a < 2"; - assert_eq!(NUM_T_ROWS, num_rows_for(&db, sql)); - assert_eq!(NUM_T_ROWS, new_row_estimate(&db, sql)); - } - - /// We estimate a selection on a non-indexed column to return all input rows. - #[test] - fn select_cardinality_estimation() { - let db = in_mem_db(); - create_table_t(&db, true); - let sql = "select * from T where b = 0"; - assert_eq!(NUM_T_ROWS, num_rows_for(&db, sql)); - assert_eq!(NUM_T_ROWS, new_row_estimate(&db, sql)); - } - - /// We estimate a projection to return all input rows. - #[test] - fn project_cardinality_estimation() { - let db = in_mem_db(); - create_table_t(&db, true); - let sql = "select a from T"; - assert_eq!(NUM_T_ROWS, num_rows_for(&db, sql)); - } - - /// We estimate an inner join to return the product of its input sizes. - #[test] - fn cardinality_estimation_inner_join() { - let db = in_mem_db(); - create_table_t(&db, false); - create_table_s(&db, false); - let sql = "select T.* from T join S on T.a = S.a where S.c = 0"; - let est = NUM_T_ROWS * NUM_S_ROWS; - assert_eq!(est, num_rows_for(&db, sql)); - assert_eq!(est, new_row_estimate(&db, sql)); + assert_eq!(2, estimate_for(&db, "select * from T where a = 0")); } - /// An index join estimates its output cardinality in the same way. - /// As the product of its estimated input cardinalities. #[test] - fn cardinality_estimation_index_join() { + fn scanned_rows_respect_filters() { let db = in_mem_db(); create_table_t(&db, true); - create_table_s(&db, true); - let sql = "select T.* from T join S on T.a = S.a where S.c = 0"; - let est = NUM_T_ROWS / NDV_T * NUM_S_ROWS / NDV_S; - assert_eq!(est, num_rows_for(&db, sql)); - assert_eq!(est, new_row_estimate(&db, sql)); + assert!(scanned_for(&db, "select * from T where a = 0") <= scanned_for(&db, "select * from T")); } } diff --git a/crates/core/src/host/module_host.rs b/crates/core/src/host/module_host.rs index 1b46b12af08..8cedfccf71e 100644 --- a/crates/core/src/host/module_host.rs +++ b/crates/core/src/host/module_host.rs @@ -51,6 +51,7 @@ use spacetimedb_datastore::locking_tx_datastore::{MutTxId, ViewCallInfo}; use spacetimedb_datastore::traits::{IsolationLevel, Program, TxData}; use spacetimedb_durability::DurableOffset; use spacetimedb_execution::pipelined::{PipelinedProject, ViewProject}; +use spacetimedb_execution::RelValue; use spacetimedb_expr::expr::CollectViews; use spacetimedb_lib::db::raw_def::v9::Lifecycle; use spacetimedb_lib::identity::{AuthCtx, RequestId}; @@ -66,7 +67,6 @@ use spacetimedb_schema::identifier::Identifier; use spacetimedb_schema::reducer_name::ReducerName; use spacetimedb_schema::schema::{Schema, TableSchema}; use spacetimedb_schema::table_name::TableName; -use spacetimedb_vm::relation::RelValue; use std::collections::VecDeque; use std::fmt; use std::sync::atomic::AtomicBool; diff --git a/crates/core/src/sql/ast.rs b/crates/core/src/sql/ast.rs index 918a7c07c48..892430ba1ea 100644 --- a/crates/core/src/sql/ast.rs +++ b/crates/core/src/sql/ast.rs @@ -1,482 +1,14 @@ -use crate::db::relational_db::{MutTx, RelationalDB, Tx}; -use crate::error::{DBError, PlanError}; use anyhow::Context; -use spacetimedb_data_structures::map::{HashCollectionExt as _, IntMap}; use spacetimedb_datastore::locking_tx_datastore::state_view::StateView; use spacetimedb_datastore::system_tables::{StRowLevelSecurityFields, ST_ROW_LEVEL_SECURITY_ID}; use spacetimedb_expr::check::SchemaView; -use spacetimedb_expr::statement::compile_sql_stmt; use spacetimedb_lib::identity::AuthCtx; -use spacetimedb_primitives::{ColId, TableId}; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -use spacetimedb_schema::def::error::RelationError; -use spacetimedb_schema::relation::{ColExpr, FieldName}; -use spacetimedb_schema::schema::{ColumnSchema, TableOrViewSchema, TableSchema}; -use spacetimedb_schema::table_name::TableName; -use spacetimedb_vm::errors::ErrorVm; -use spacetimedb_vm::expr::{Expr, FieldExpr, FieldOp}; -use spacetimedb_vm::operator::{OpCmp, OpLogic, OpQuery}; -use spacetimedb_vm::ops::parse::{parse, parse_simple_enum}; -use sqlparser::ast::{ - Assignment, BinaryOperator, Expr as SqlExpr, HiveDistributionStyle, Ident, JoinConstraint, JoinOperator, - ObjectName, Query, Select, SelectItem, SetExpr, Statement, TableFactor, TableWithJoins, Value, Values, -}; -use sqlparser::dialect::PostgreSqlDialect; -use sqlparser::parser::Parser; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::AlgebraicValue; +use spacetimedb_schema::schema::TableOrViewSchema; use std::ops::Deref; use std::sync::Arc; -/// Simplify to detect features of the syntax we don't support yet -/// Because we use [PostgreSqlDialect] in the compiler step it already protect against features -/// that are not in the standard SQL-92 but still need to check for completeness -trait Unsupported { - fn unsupported(&self) -> bool; -} - -impl Unsupported for bool { - fn unsupported(&self) -> bool { - *self - } -} - -impl Unsupported for Option { - fn unsupported(&self) -> bool { - self.is_some() - } -} - -impl Unsupported for Vec { - fn unsupported(&self) -> bool { - !self.is_empty() - } -} - -impl Unsupported for HiveDistributionStyle { - fn unsupported(&self) -> bool { - !matches!(self, HiveDistributionStyle::NONE) - } -} - -impl Unsupported for sqlparser::ast::GroupByExpr { - fn unsupported(&self) -> bool { - match self { - sqlparser::ast::GroupByExpr::All => true, - sqlparser::ast::GroupByExpr::Expressions(v) => v.unsupported(), - } - } -} - -macro_rules! unsupported { - ($name:literal,$a:expr)=>{{ - let name = stringify!($name); - let it = stringify!($a); - if $a.unsupported() { - return Err(PlanError::Unsupported { - feature: format!("Unsupported {name} with `{it}` feature."), - - }); - } - }}; - ($name:literal,$($a:expr),+$(,)?)=> {{ - $(unsupported!($name,$a);)+ - }}; -} - -/// A convenient wrapper for a table name (that comes from an `ObjectName`). -pub struct Table { - pub(crate) name: Box, -} - -impl Table { - pub fn new(name: ObjectName) -> Self { - Self { - name: name.to_string().into(), - } - } -} - -#[derive(Debug)] -pub enum Column { - /// Any expression, not followed by `[ AS ] alias` - UnnamedExpr(Expr), - /// An qualified `table.*` - QualifiedWildcard { table: String }, - /// An unqualified `SELECT *` - Wildcard, -} - -/// The list of expressions for `SELECT expr1, expr2...` determining what data to extract. -#[derive(Debug, Clone)] -pub struct Selection { - pub(crate) clause: FieldOp, -} - -impl Selection { - pub fn with_cmp(op: OpQuery, lhs: FieldOp, rhs: FieldOp) -> Self { - let cmp = FieldOp::new(op, lhs, rhs); - Selection { clause: cmp } - } -} - -#[derive(Debug)] -pub struct OnExpr { - pub op: OpCmp, - pub lhs: FieldName, - pub rhs: FieldName, -} - -/// The `JOIN [INNER] ON join_expr OpCmp join_expr` clause -#[derive(Debug)] -pub enum Join { - Inner { rhs: Arc, on: OnExpr }, -} - -/// The list of tables in `... FROM table1 [JOIN table2] ...` -#[derive(Debug)] -pub struct From { - pub root: Arc, - pub joins: Vec, -} - -impl From { - pub fn new(root: Arc) -> Self { - Self { - root, - joins: Vec::new(), - } - } - - pub fn with_inner_join(mut self, rhs: Arc, on: OnExpr) -> Self { - // Check if the field are inverted: - // FROM t1 JOIN t2 ON t2.id = t1.id - let on = if on.rhs.table() == self.root.table_id && self.root.get_column_by_field(on.rhs).is_some() { - OnExpr { - op: on.op.reverse(), - lhs: on.rhs, - rhs: on.lhs, - } - } else { - on - }; - - self.joins.push(Join::Inner { rhs, on }); - self - } - - /// Returns all the tables, including the ones inside the joins - pub fn iter_tables(&self) -> impl Clone + Iterator { - [&*self.root] - .into_iter() - .chain(self.joins.iter().map(|Join::Inner { rhs, .. }| &**rhs)) - } - - /// Returns all the table names as a `Vec`, including the ones inside the joins. - pub fn table_names(&self) -> Vec { - self.iter_tables().map(|x| x.table_name.clone()).collect() - } - - /// Returns the field matching `f` looking in `tables`. - /// - /// See [`find_field`] for more details. - pub(super) fn find_field(&self, f: &str) -> Result<(FieldName, &AlgebraicType), PlanError> { - find_field(self.iter_tables(), f) - } - - /// Returns the name of the table, - /// together with the column definition at position `field.col`, - /// for table `field.table_id`. - pub(super) fn find_field_name(&self, field: FieldName) -> Option<(&str, &ColumnSchema)> { - self.iter_tables().find_map(|t| { - if t.table_id == field.table() { - t.get_column_by_field(field).map(|c| (&*t.table_name, c)) - } else { - None - } - }) - } -} - -/// Returns the field matching `f` looking in `tables` -/// for `{table_name}.{field_name}` (qualified) or `{field_name}`. -/// -/// # Errors -/// -/// If the field is not fully qualified by the user, -/// it may lead to duplicates, causing ambiguity. -/// For example, in the query `WHERE a = lhs.a AND rhs.a = a`, -/// the fields `['lhs.a', 'rhs.a', 'a']` are ambiguous. -/// -/// Returns an error if no fields match `f` (`PlanError::UnknownField`) -/// or if the field is ambiguous due to multiple matches (`PlanError::AmbiguousField`). -pub fn find_field<'a>( - mut tables: impl Clone + Iterator, - f: &str, -) -> Result<(FieldName, &'a AlgebraicType), PlanError> { - fn extract_table_field(ident: &str) -> Result<(Option<&str>, &str), RelationError> { - let mut iter = ident.rsplit('.'); - let field = iter.next(); - let table = iter.next(); - let more = iter.next(); - match (field, table, more) { - (Some(field), table, None) => Ok((table, field)), - _ => Err(RelationError::FieldPathInvalid(ident.to_string())), - } - } - - let (f_table, f_field) = extract_table_field(f)?; - - let tables2 = tables.clone(); - let unknown_field = || { - let field = match f_table { - Some(f_table) => format!("{f_table}.{f_field}"), - None => f_field.into(), - }; - let tables = tables2.map(|t| t.table_name.clone()).collect(); - Err(PlanError::UnknownField { field, tables }) - }; - - if let Some(f_table) = f_table { - // Qualified field `{f_table}.{f_field}`. - // Narrow search to first table with name `f_table`. - return if let Some(col) = tables - .find(|t| &*t.table_name == f_table) - .and_then(|t| t.get_column_by_name(f_field)) - { - Ok((FieldName::new(col.table_id, col.col_pos), &col.col_type)) - } else { - unknown_field() - }; - } - - // Unqualified field `{f_field}`. - // Find all columns with a matching name. - let mut fields = tables - .flat_map(|t| t.columns().iter().map(move |col| (t, col))) - .filter(|(_, col)| &*col.col_name == f_field); - - // When there's a single candidate, we've found our match. - // Otherwise, if are none or several candidates, error. - match (fields.next(), fields.next()) { - (None, _) => unknown_field(), - (Some((_, col)), None) => Ok((FieldName::new(col.table_id, col.col_pos), &col.col_type)), - (Some(f1), Some(f2)) => { - let found = [f1, f2] - .into_iter() - .chain(fields) - .map(|(table, column)| format!("{0}.{1}", &table.table_name, &column.col_name)) - .collect(); - Err(PlanError::AmbiguousField { field: f.into(), found }) - } - } -} - -/// Defines the portions of the `SQL` standard that we support. -#[derive(Debug)] -pub enum SqlAst { - Select { - from: From, - project: Box<[Column]>, - selection: Option, - }, - Insert { - table: Arc, - columns: Box<[ColId]>, - values: Box<[Box<[ColExpr]>]>, - }, - Update { - table: Arc, - assignments: IntMap, - selection: Option, - }, - Delete { - table: Arc, - selection: Option, - }, - SetVar { - name: String, - literal: String, - }, - ReadVar { - name: String, - }, -} - -fn extract_field<'a>( - tables: impl Clone + Iterator, - of: &SqlExpr, -) -> Result, PlanError> { - match of { - SqlExpr::Identifier(x) => find_field(tables, &x.value).map(|(_, ty)| Some(ty)), - SqlExpr::CompoundIdentifier(ident) => { - let col_name = compound_ident(ident); - find_field(tables, &col_name).map(|(_, ty)| Some(ty)) - } - _ => Ok(None), - } -} - -/// Parses `value` according to the type of the field, as provided by `field`. -/// -/// When `field` is `None`, the type is inferred to an integer or float depending on if a `.` separator is present. -/// The `is_long` parameter decides whether to parse as a 64-bit type or a 32-bit one. -fn infer_number(field: Option<&AlgebraicType>, value: &str, is_long: bool) -> Result { - match field { - None => { - let ty = if value.contains('.') { - if is_long { - AlgebraicType::F64 - } else { - AlgebraicType::F32 - } - } else if is_long { - AlgebraicType::I64 - } else { - AlgebraicType::I32 - }; - parse(value, &ty) - } - Some(f) => parse(value, f), - } -} - -/// `Enums` in `sql` are simple strings like `Player` that must be inferred by their type. -/// -/// If `field` is a `simple enum` it looks for the `tag` specified by `value`, else it should be a plain `String`. -fn infer_str_or_enum(field: Option<&AlgebraicType>, value: String) -> Result { - if let Some(sum) = field.and_then(|x| x.as_sum()) { - parse_simple_enum(sum, &value) - } else { - Ok(AlgebraicValue::String(value.into())) - } -} - -/// Compiles a [SqlExpr] expression into a [ColumnOp] -fn compile_expr_value<'a>( - tables: impl Clone + Iterator, - field: Option<&'a AlgebraicType>, - of: SqlExpr, -) -> Result { - Ok(FieldOp::Field(match of { - SqlExpr::Identifier(name) => FieldExpr::Name(find_field(tables, &name.value)?.0), - SqlExpr::CompoundIdentifier(ident) => { - let col_name = compound_ident(&ident); - FieldExpr::Name(find_field(tables, &col_name)?.0) - } - SqlExpr::Value(x) => FieldExpr::Value(match x { - Value::Number(value, is_long) => infer_number(field, &value, is_long)?, - Value::SingleQuotedString(s) => infer_str_or_enum(field, s)?, - Value::DoubleQuotedString(s) => AlgebraicValue::String(s.into()), - Value::HexStringLiteral(s) => infer_number(field, &s, false)?, - Value::Boolean(x) => AlgebraicValue::Bool(x), - Value::Null => AlgebraicValue::OptionNone(), - x => { - return Err(PlanError::Unsupported { - feature: format!("Unsupported value: {x}."), - }); - } - }), - SqlExpr::BinaryOp { left, op, right } => { - let (op, lhs, rhs) = compile_bin_op(tables, op, left, right)?; - - return Ok(FieldOp::new(op, lhs, rhs)); - } - SqlExpr::Nested(x) => { - return compile_expr_value(tables, field, *x); - } - x => { - return Err(PlanError::Unsupported { - feature: format!("Unsupported expression: {x}"), - }); - } - })) -} - -fn compile_expr_field(table: &From, field: Option<&AlgebraicType>, of: SqlExpr) -> Result { - match compile_expr_value(table.iter_tables(), field, of)? { - FieldOp::Field(field) => Ok(field), - x => Err(PlanError::Unsupported { - feature: format!("Complex expression {x} on insert..."), - }), - } -} - -/// Compiles the [Table] from a section of `SQL` that describes a table clause. -fn compile_table_factor(table: TableFactor) -> Result { - match table { - TableFactor::Table { - name, - alias, - args, - with_hints, - version, - partitions, - } => { - unsupported!("TableFactor", alias, args, with_hints, version, partitions); - - Ok(Table::new(name)) - } - x => Err(PlanError::Unsupported { - feature: format!("TableFactor with syntax {x:?} not supported"), - }), - } -} - -/// Compiles a binary operation like `field > 1` -fn compile_bin_op<'a>( - tables: impl Clone + Iterator, - op: BinaryOperator, - lhs: Box, - rhs: Box, -) -> Result<(OpQuery, FieldOp, FieldOp), PlanError> { - let op: OpQuery = match op { - BinaryOperator::Gt => OpCmp::Gt.into(), - BinaryOperator::Lt => OpCmp::Lt.into(), - BinaryOperator::GtEq => OpCmp::GtEq.into(), - BinaryOperator::LtEq => OpCmp::LtEq.into(), - BinaryOperator::Eq => OpCmp::Eq.into(), - BinaryOperator::NotEq => OpCmp::NotEq.into(), - BinaryOperator::And => OpLogic::And.into(), - BinaryOperator::Or => OpLogic::Or.into(), - x => { - return Err(PlanError::Unsupported { - feature: format!("BinaryOperator not supported in WHERE: {x}."), - }); - } - }; - - let field_lhs = extract_field(tables.clone(), &lhs)?; - let field_rhs = extract_field(tables.clone(), &rhs)?; - // This inversion is for inferring the type of the right side, like in `inventory.id = 1`, - // so `1` get the type of `inventory.id` - let lhs = compile_expr_value(tables.clone(), field_rhs, *lhs)?; - let rhs = compile_expr_value(tables, field_lhs, *rhs)?; - - Ok((op, lhs, rhs)) -} - -fn _compile_where(table: &From, filter: SqlExpr) -> Result, PlanError> { - match filter { - SqlExpr::BinaryOp { left, op, right } => { - let (op, lhs, rhs) = compile_bin_op(table.iter_tables(), op, left, right)?; - - Ok(Some(Selection::with_cmp(op, lhs, rhs))) - } - SqlExpr::Nested(x) => _compile_where(table, *x), - x => Err(PlanError::Unsupported { - feature: format!("Unsupported in WHERE: {x}."), - }), - } -} - -/// Compiles the `WHERE` clause -fn compile_where(table: &From, filter: Option) -> Result, PlanError> { - if let Some(filter) = filter { - _compile_where(table, filter) - } else { - Ok(None) - } -} - pub struct SchemaViewer<'a, T> { tx: &'a T, auth: &'a AuthCtx, @@ -543,489 +75,3 @@ impl<'a, T> SchemaViewer<'a, T> { Self { tx, auth } } } - -pub trait TableSchemaView { - fn find_table(&self, db: &RelationalDB, t: Table) -> Result, PlanError>; -} - -impl TableSchemaView for Tx { - fn find_table(&self, db: &RelationalDB, t: Table) -> Result, PlanError> { - let table_id = db - .table_id_from_name(self, &t.name)? - .ok_or(PlanError::UnknownTable { table: t.name.clone() })?; - if !db.table_id_exists(self, &table_id) { - return Err(PlanError::UnknownTable { table: t.name }); - } - db.schema_for_table(self, table_id) - .map_err(move |e| PlanError::DatabaseInternal(Box::new(e))) - } -} - -impl TableSchemaView for MutTx { - fn find_table(&self, db: &RelationalDB, t: Table) -> Result, PlanError> { - let table_id = db - .table_id_from_name_mut(self, &t.name)? - .ok_or(PlanError::UnknownTable { table: t.name.clone() })?; - if !db.table_id_exists_mut(self, &table_id) { - return Err(PlanError::UnknownTable { table: t.name }); - } - db.schema_for_table_mut(self, table_id) - .map_err(|e| PlanError::DatabaseInternal(Box::new(e))) - } -} - -/// Compiles the `FROM` clause -fn compile_from( - db: &RelationalDB, - tx: &T, - from: &[TableWithJoins], -) -> Result { - if from.len() > 1 { - return Err(PlanError::Unsupported { - feature: "Multiple tables in `FROM`.".into(), - }); - } - - let root_table = match from.first() { - Some(root_table) => root_table, - None => { - return Err(PlanError::Unstructured("Missing `FROM` expression.".into())); - } - }; - - let t = compile_table_factor(root_table.relation.clone())?; - let base = tx.find_table(db, t)?; - let mut base = From::new(base); - - for join in &root_table.joins { - match &join.join_operator { - JoinOperator::Inner(constraint) => { - let t = compile_table_factor(join.relation.clone())?; - let join = tx.find_table(db, t)?; - - match constraint { - JoinConstraint::On(x) => { - let tables = base.iter_tables().chain([&*join]); - let expr = compile_expr_value(tables, None, x.clone())?; - match expr { - FieldOp::Field(_) => {} - FieldOp::Cmp { op, lhs, rhs } => { - let op = match op { - OpQuery::Cmp(op) => op, - OpQuery::Logic(op) => { - return Err(PlanError::Unsupported { - feature: format!("Can't use operator {op} on JOIN clause"), - }); - } - }; - let (lhs, rhs) = match (*lhs, *rhs) { - (FieldOp::Field(FieldExpr::Name(lhs)), FieldOp::Field(FieldExpr::Name(rhs))) => { - (lhs, rhs) - } - (lhs, rhs) => { - return Err(PlanError::Unsupported { - feature: format!( - "Can't compare non-field expressions {lhs} and {rhs} in JOIN clause" - ), - }); - } - }; - - base = base.with_inner_join(join, OnExpr { op, lhs, rhs }) - } - } - } - x => { - return Err(PlanError::Unsupported { - feature: format!("JOIN constrain {x:?} is not valid, can be only on the form Table.Field [Cmp] Table.Field"), - }); - } - } - } - x => { - return Err(PlanError::Unsupported { - feature: format!("Unsupported JOIN operator: `{x:?}`"), - }); - } - } - } - - Ok(base) -} - -fn compound_ident(ident: &[Ident]) -> String { - ident.iter().map(ToString::to_string).collect::>().join(".") -} - -fn compile_select_item(from: &From, select_item: SelectItem) -> Result { - match select_item { - SelectItem::UnnamedExpr(expr) => match expr { - sqlparser::ast::Expr::Identifier(ident) => { - let col_name = ident.to_string(); - - Ok(Column::UnnamedExpr(Expr::Ident(col_name))) - } - sqlparser::ast::Expr::CompoundIdentifier(ident) => { - let col_name = compound_ident(&ident); - - Ok(Column::UnnamedExpr(Expr::Ident(col_name))) - } - sqlparser::ast::Expr::Value(_) => { - let value = compile_expr_value(from.iter_tables(), None, expr)?; - match value { - FieldOp::Field(value) => match value { - FieldExpr::Name(_) => Err(PlanError::Unsupported { - feature: "Should not be an identifier in Expr::Value".to_string(), - }), - FieldExpr::Value(x) => Ok(Column::UnnamedExpr(Expr::Value(x))), - }, - x => Err(PlanError::Unsupported { - feature: format!("Should not be an {x} in Expr::Value"), - }), - } - } - sqlparser::ast::Expr::Nested(x) => compile_select_item(from, SelectItem::UnnamedExpr(*x)), - _ => Err(PlanError::Unsupported { - feature: "Only columns names & scalars are supported.".into(), - }), - }, - SelectItem::ExprWithAlias { expr: _, alias: _ } => Err(PlanError::Unsupported { - feature: "ExprWithAlias".into(), - }), - SelectItem::QualifiedWildcard(ident, _) => Ok(Column::QualifiedWildcard { - table: ident.to_string(), - }), - SelectItem::Wildcard(_) => Ok(Column::Wildcard), - } -} - -/// Compiles the `SELECT ...` clause -fn compile_select( - db: &RelationalDB, - tx: &T, - select: Select, -) -> Result { - let from = compile_from(db, tx, &select.from)?; - - // SELECT ... - let mut project = Vec::with_capacity(select.projection.len()); - for select_item in select.projection { - project.push(compile_select_item(&from, select_item)?); - } - let project = project.into(); - - let selection = compile_where(&from, select.selection)?; - - Ok(SqlAst::Select { - from, - project, - selection, - }) -} - -/// Compiles any `query` clause (currently only `SELECT...`) -fn compile_query(db: &RelationalDB, tx: &T, query: Query) -> Result { - unsupported!( - "SELECT", - query.order_by, - query.fetch, - query.limit, - query.offset, - query.locks, - query.with - ); - - match *query.body { - SetExpr::Select(select) => { - unsupported!( - "SELECT", - select.distinct, - select.top, - select.into, - select.lateral_views, - select.group_by, - select.having, - select.sort_by - ); - - compile_select(db, tx, *select) - } - SetExpr::Query(_) => Err(PlanError::Unsupported { - feature: "Query".into(), - }), - SetExpr::SetOperation { - op: _, - set_quantifier: _, - left: _, - right: _, - } => Err(PlanError::Unsupported { - feature: "SetOperation".into(), - }), - SetExpr::Values(_) => Err(PlanError::Unsupported { - feature: "Values".into(), - }), - SetExpr::Insert(_) => Err(PlanError::Unsupported { - feature: "SetExpr::Insert".into(), - }), - SetExpr::Update(_) => Err(PlanError::Unsupported { - feature: "SetExpr::Update".into(), - }), - SetExpr::Table(_) => Err(PlanError::Unsupported { - feature: "SetExpr::Table".into(), - }), - } -} - -/// Compiles the `INSERT ...` clause -fn compile_insert( - db: &RelationalDB, - tx: &T, - table_name: ObjectName, - columns: Vec, - data: &Values, -) -> Result { - let table = tx.find_table(db, Table::new(table_name))?; - - let table = From::new(table); - - let columns = columns - .into_iter() - .map(|x| { - table - .find_field(&format!("{}.{}", &table.root.table_name, x)) - .map(|(f, _)| f.col) - }) - .collect::, _>>()?; - - let mut values = Vec::with_capacity(data.rows.len()); - for x in &data.rows { - let mut row = Vec::with_capacity(x.len()); - for (pos, v) in x.iter().enumerate() { - let field_ty = table.root.get_column(pos).map(|col| &col.col_type); - row.push(compile_expr_field(&table, field_ty, v.clone())?.strip_table()); - } - values.push(row.into()); - } - let values = values.into(); - - Ok(SqlAst::Insert { - table: table.root, - columns, - values, - }) -} - -/// Compiles the `UPDATE ...` clause -fn compile_update( - db: &RelationalDB, - tx: &T, - table: Table, - assignments: Vec, - selection: Option, -) -> Result { - let table = From::new(tx.find_table(db, table)?); - let selection = compile_where(&table, selection)?; - - let mut assigns = IntMap::with_capacity(assignments.len()); - for col in assignments { - let name: String = col.id.iter().map(|x| x.to_string()).collect(); - let (field_name, field_ty) = table.find_field(&name)?; - let col_id = field_name.col; - - let value = compile_expr_field(&table, Some(field_ty), col.value)?.strip_table(); - assigns.insert(col_id, value); - } - - Ok(SqlAst::Update { - table: table.root, - assignments: assigns, - selection, - }) -} - -/// Compiles the `DELETE ...` clause -fn compile_delete( - db: &RelationalDB, - tx: &T, - table: Table, - selection: Option, -) -> Result { - let table = From::new(tx.find_table(db, table)?); - let selection = compile_where(&table, selection)?; - - Ok(SqlAst::Delete { - table: table.root, - selection, - }) -} - -// Compiles the equivalent of `SET key = value` -fn compile_set_config(name: ObjectName, value: Vec) -> Result { - let name = name.to_string(); - - let value = match value.as_slice() { - [first] => first.clone(), - _ => { - return Err(PlanError::Unsupported { - feature: format!("Invalid value for config: {name} => {value:?}."), - }); - } - }; - - let literal = match value { - SqlExpr::Value(x) => match x { - Value::Number(value, _) => value, - x => { - return Err(PlanError::Unsupported { - feature: format!("Unsupported value for config: {x}."), - }); - } - }, - x => { - return Err(PlanError::Unsupported { - feature: format!("Unsupported expression for config: {x}"), - }); - } - }; - - Ok(SqlAst::SetVar { name, literal }) -} - -/// Compiles the equivalent of `SHOW key` -fn compile_read_config(name: Vec) -> Result { - let name = match name.as_slice() { - [first] => first.to_string(), - _ => { - return Err(PlanError::Unsupported { - feature: format!("Invalid name for config: {name:?}"), - }); - } - }; - Ok(SqlAst::ReadVar { name }) -} - -/// Compiles a `SQL` clause -fn compile_statement( - db: &RelationalDB, - tx: &T, - statement: Statement, -) -> Result { - match statement { - Statement::Query(query) => Ok(compile_query(db, tx, *query)?), - Statement::Insert { - or, - into, - table_name, - columns, - overwrite, - source, - partitioned, - after_columns, - table, - on, - returning, - } => { - unsupported!( - "INSERT", - or, - overwrite, - partitioned, - after_columns, - table, - on, - returning - ); - if into { - let values = match &*source.body { - SetExpr::Values(values) => values, - _ => { - return Err(PlanError::Unsupported { - feature: "Insert WITHOUT values".into(), - }); - } - }; - - return compile_insert(db, tx, table_name, columns, values); - }; - - Err(PlanError::Unsupported { - feature: "INSERT without INTO".into(), - }) - } - Statement::Update { - table, - assignments, - from, - selection, - returning, - } => { - unsupported!("UPDATE", from, returning); - - let table_name = compile_table_factor(table.relation)?; - compile_update(db, tx, table_name, assignments, selection) - } - Statement::Delete { - tables, - from, - using, - selection, - returning, - } => { - unsupported!("DELETE", using, returning, tables); - if from.len() != 1 { - unsupported!("DELETE (multiple tables)", tables); - } - - let table = from.first().unwrap().clone(); - let table_name = compile_table_factor(table.relation)?; - compile_delete(db, tx, table_name, selection) - } - Statement::SetVariable { - local, - hivevar, - variable, - value, - } => { - unsupported!("SET", local, hivevar); - compile_set_config(variable, value) - } - Statement::ShowVariable { variable } => compile_read_config(variable), - x => Err(PlanError::Unsupported { - feature: format!("Syntax {x}"), - }), - } -} - -/// Compiles a `sql` string into a `Vec` using a SQL parser with [PostgreSqlDialect] -pub(crate) fn compile_to_ast( - db: &RelationalDB, - auth: &AuthCtx, - tx: &T, - sql_text: &str, -) -> Result, DBError> { - // NOTE: The following ensures compliance with the 1.0 sql api. - // Come 1.0, it will have replaced the current compilation stack. - compile_sql_stmt(sql_text, &SchemaViewer::new(tx, auth), auth)?; - - let dialect = PostgreSqlDialect {}; - let ast = Parser::parse_sql(&dialect, sql_text).map_err(|error| DBError::SqlParser { - sql: sql_text.to_string(), - error, - })?; - - let mut results = Vec::new(); - for statement in ast { - let plan_result = compile_statement(db, tx, statement); - let query = match plan_result { - Ok(plan) => plan, - Err(error) => { - return Err(DBError::Plan { - sql: sql_text.to_string(), - error, - }); - } - }; - results.push(query); - } - Ok(results) -} diff --git a/crates/core/src/sql/compiler.rs b/crates/core/src/sql/compiler.rs deleted file mode 100644 index f801f64d177..00000000000 --- a/crates/core/src/sql/compiler.rs +++ /dev/null @@ -1,999 +0,0 @@ -use super::ast::TableSchemaView; -use super::ast::{compile_to_ast, Column, From, Join, Selection, SqlAst}; -use super::type_check::TypeCheck; -use crate::db::relational_db::RelationalDB; -use crate::error::{DBError, PlanError}; -use core::ops::Deref; -use spacetimedb_data_structures::map::IntMap; -use spacetimedb_datastore::locking_tx_datastore::state_view::StateView; -use spacetimedb_lib::identity::AuthCtx; -use spacetimedb_primitives::ColId; -use spacetimedb_schema::relation::{self, ColExpr, DbTable, FieldName, Header}; -use spacetimedb_schema::schema::TableSchema; -use spacetimedb_vm::expr::{CrudExpr, Expr, FieldExpr, QueryExpr, SourceExpr}; -use spacetimedb_vm::operator::OpCmp; -use std::sync::Arc; - -/// DIRTY HACK ALERT: Maximum allowed length, in UTF-8 bytes, of SQL queries. -/// Any query longer than this will be rejected. -/// This prevents a stack overflow when compiling queries with deeply-nested `AND` and `OR` conditions. -const MAX_SQL_LENGTH: usize = 50_000; - -/// Compile the `SQL` expression into an `ast` -pub fn compile_sql( - db: &RelationalDB, - auth: &AuthCtx, - tx: &T, - sql_text: &str, -) -> Result, DBError> { - if sql_text.len() > MAX_SQL_LENGTH { - return Err(anyhow::anyhow!("SQL query exceeds maximum allowed length: \"{sql_text:.120}...\"").into()); - } - tracing::trace!(sql = sql_text); - let ast = compile_to_ast(db, auth, tx, sql_text)?; - - // TODO(perf, bikeshedding): SmallVec? - let mut results = Vec::with_capacity(ast.len()); - - for sql in ast { - results.push(compile_statement(db, sql).map_err(|error| DBError::Plan { - sql: sql_text.to_string(), - error, - })?); - } - - Ok(results) -} - -fn expr_for_projection(table: &From, of: Expr) -> Result { - match of { - Expr::Ident(x) => table.find_field(&x).map(|(f, _)| FieldExpr::Name(f)), - Expr::Value(x) => Ok(FieldExpr::Value(x)), - x => unreachable!("Wrong expression in SQL query {:?}", x), - } -} - -/// Compiles a `WHERE ...` clause -fn compile_where(mut q: QueryExpr, filter: Selection) -> Result { - for op in filter.clause.flatten_ands() { - q = q.with_select(op)?; - } - Ok(q) -} - -/// Compiles a `SELECT ...` clause -fn compile_select(table: From, project: Box<[Column]>, selection: Option) -> Result { - let mut not_found = Vec::with_capacity(project.len()); - let mut col_ids = Vec::new(); - let mut qualified_wildcards = Vec::new(); - //Match columns to their tables... - for select_item in Vec::from(project) { - match select_item { - Column::UnnamedExpr(x) => match expr_for_projection(&table, x) { - Ok(field) => col_ids.push(field), - Err(PlanError::UnknownField { field, tables: _ }) => not_found.push(field), - Err(err) => return Err(err), - }, - Column::QualifiedWildcard { table: name } => match table.iter_tables().find(|x| *x.table_name == name) { - Some(t) => { - for c in t.columns().iter() { - col_ids.push(FieldName::new(t.table_id, c.col_pos).into()); - } - qualified_wildcards.push(t.table_id); - } - _ => { - return Err(PlanError::TableNotFoundQualified { expect: name }); - } - }, - Column::Wildcard => {} - } - } - - if !not_found.is_empty() { - return Err(PlanError::UnknownFields { - fields: not_found, - tables: table.table_names(), - }); - } - - let source_expr: SourceExpr = table.root.deref().into(); - let mut q = QueryExpr::new(source_expr); - - for join in table.joins { - match join { - Join::Inner { rhs, on } => { - let col_lhs = q.head().column_pos_or_err(on.lhs)?; - let rhs_source_expr: SourceExpr = rhs.deref().into(); - let col_rhs = rhs_source_expr.head().column_pos_or_err(on.rhs)?; - - match on.op { - OpCmp::Eq => {} - x => unreachable!("Unsupported operator `{x}` for joins"), - } - // Always construct inner joins, never semijoins. - // The query optimizer can rewrite certain inner joins into semijoins later in the pipeline. - // The full pipeline for a query like `SELECT lhs.* FROM lhs JOIN rhs ON lhs.a = rhs.a` is: - // - We produce `[JoinInner(semi: false), Project]`. - // - Optimizer rewrites to `[JoinInner(semi: true)]`. - // - Optimizer rewrites to `[IndexJoin]`. - // For incremental queries, this all happens on the original query with `DbTable` sources. - // Then, the query is "incrementalized" by replacing the sources with `MemTable`s, - // and the `IndexJoin` is rewritten back into a `JoinInner(semi: true)`. - q = q.with_join_inner(rhs_source_expr, col_lhs, col_rhs, false); - } - } - } - - if let Some(filter) = selection { - q = compile_where(q, filter)?; - } - // It is important to project at the end. - // This is so joins and filters see fields that are not projected. - // It is also important to identify a wildcard project of the form `table.*`. - // This implies a potential semijoin and additional optimization opportunities. - let qualified_wildcard = (qualified_wildcards.len() == 1).then(|| qualified_wildcards[0]); - q = q.with_project(col_ids, qualified_wildcard)?; - - Ok(q) -} - -/// Builds the schema description [DbTable] from the [TableSchema] and their list of columns -fn compile_columns(table: &TableSchema, cols: &[ColId]) -> DbTable { - let mut columns = Vec::with_capacity(cols.len()); - let cols = cols - .iter() - // TODO: should we error here instead? - // When would the user be passing in columns that aren't present? - .filter_map(|col| table.get_column(col.idx())) - .map(|col| relation::Column::new(FieldName::new(table.table_id, col.col_pos), col.col_type.clone())); - columns.extend(cols); - - let header = Header::from(table).project_col_list(&columns.iter().map(|x| x.field.col).collect()); - - DbTable::new(Arc::new(header), table.table_id, table.table_type, table.table_access) -} - -/// Compiles a `INSERT ...` clause -fn compile_insert(table: &TableSchema, cols: &[ColId], values: Box<[Box<[ColExpr]>]>) -> CrudExpr { - let table = compile_columns(table, cols); - - let mut rows = Vec::with_capacity(values.len()); - for x in Vec::from(values) { - let mut row = Vec::with_capacity(x.len()); - for v in Vec::from(x) { - match v { - ColExpr::Col(x) => { - todo!("Deal with idents in insert?: {}", x) - } - ColExpr::Value(x) => { - row.push(x); - } - } - } - rows.push(row.into()) - } - - CrudExpr::Insert { table, rows } -} - -/// Compiles a `DELETE ...` clause -fn compile_delete(table: Arc, selection: Option) -> Result { - let query = QueryExpr::new(&*table); - let query = if let Some(filter) = selection { - compile_where(query, filter)? - } else { - query - }; - Ok(CrudExpr::Delete { query }) -} - -/// Compiles a `UPDATE ...` clause -fn compile_update( - table: Arc, - assignments: IntMap, - selection: Option, -) -> Result { - let query = QueryExpr::new(&*table); - let delete = if let Some(filter) = selection { - compile_where(query, filter)? - } else { - query - }; - - Ok(CrudExpr::Update { delete, assignments }) -} - -/// Compiles a `SQL` clause -fn compile_statement(db: &RelationalDB, statement: SqlAst) -> Result { - statement.type_check()?; - - let q = match statement { - SqlAst::Select { - from, - project, - selection, - } => CrudExpr::Query(compile_select(from, project, selection)?), - SqlAst::Insert { table, columns, values } => compile_insert(&table, &columns, values), - SqlAst::Update { - table, - assignments, - selection, - } => compile_update(table, assignments, selection)?, - SqlAst::Delete { table, selection } => compile_delete(table, selection)?, - SqlAst::SetVar { name, literal } => CrudExpr::SetVar { name, literal }, - SqlAst::ReadVar { name } => CrudExpr::ReadVar { name }, - }; - - Ok(q.optimize(&|table_id, table_name| db.row_count(table_id, table_name))) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::db::relational_db::tests_utils::{begin_mut_tx, begin_tx, insert, with_auto_commit, TestDB}; - use crate::sql::execute::tests::run_for_testing; - use spacetimedb_lib::error::{ResultTest, TestError}; - use spacetimedb_lib::{ConnectionId, Identity}; - use spacetimedb_primitives::{col_list, ColList, TableId}; - use spacetimedb_sats::{product, AlgebraicType, AlgebraicValue, GroundSpacetimeType as _}; - use spacetimedb_vm::expr::{ColumnOp, IndexJoin, IndexScan, JoinExpr, Query}; - use std::convert::From; - use std::ops::Bound; - - fn assert_index_scan( - op: &Query, - cols: impl Into, - low_bound: Bound, - up_bound: Bound, - ) -> TableId { - if let Query::IndexScan(IndexScan { table, columns, bounds }) = op { - assert_eq!(columns, &cols.into(), "Columns don't match"); - assert_eq!(bounds.0, low_bound, "Lower bound don't match"); - assert_eq!(bounds.1, up_bound, "Upper bound don't match"); - table.table_id - } else { - panic!("Expected IndexScan, got {op}"); - } - } - - fn assert_one_eq_index_scan(op: &Query, cols: impl Into, val: AlgebraicValue) -> TableId { - let val = Bound::Included(val); - assert_index_scan(op, cols, val.clone(), val) - } - - fn assert_select(op: &Query) { - assert!(matches!(op, Query::Select(_))); - } - - fn compile_sql( - db: &RelationalDB, - tx: &T, - sql: &str, - ) -> Result, DBError> { - super::compile_sql(db, &AuthCtx::for_testing(), tx, sql) - } - - #[test] - fn compile_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] without any indexes - let schema = &[("a", AlgebraicType::U64)]; - let indexes = &[]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Compile query - let sql = "select * from test where a = 1"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - assert_select(&query[0]); - Ok(()) - } - - #[test] - fn compile_not_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with cols [a, b] and index on [b]. - db.create_table_for_test( - "test", - &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)], - &[1.into(), 0.into()], - )?; - - let tx = begin_tx(&db); - // Should work with any qualified field. - let sql = "select * from test where a = 1 and b <> 3"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(2, query.len()); - assert_one_eq_index_scan(&query[0], 0, 1u64.into()); - assert_select(&query[1]); - Ok(()) - } - - #[test] - fn compile_index_eq_basic() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with index on [a] - let schema = &[("a", AlgebraicType::U64)]; - let indexes = &[0.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - //Compile query - let sql = "select * from test where a = 1"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - assert_one_eq_index_scan(&query[0], 0, 1u64.into()); - Ok(()) - } - - #[test] - fn compile_eq_identity_connection_id() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] without any indexes - let schema = &[ - ("identity", Identity::get_type()), - ("identity_mix", Identity::get_type()), - ("connection_id", ConnectionId::get_type()), - ]; - let indexes = &[]; - let table_id = db.create_table_for_test("test", schema, indexes)?; - - let row = product![ - Identity::__dummy(), - Identity::from_hex("93dda09db9a56d8fa6c024d843e805d8262191db3b4ba84c5efcd1ad451fed4e").unwrap(), - ConnectionId::ZERO, - ]; - - with_auto_commit(&db, |tx| { - insert(&db, tx, table_id, &row.clone())?; - Ok::<(), TestError>(()) - })?; - - // Check can be used by CRUD ops: - let sql = &format!( - "INSERT INTO test (identity, identity_mix, connection_id) VALUES ({}, x'91DDA09DB9A56D8FA6C024D843E805D8262191DB3B4BA84C5EFCD1AD451FED4E', {})", - Identity::__dummy(), - ConnectionId::ZERO, - ); - run_for_testing(&db, sql)?; - - // Compile query, check for both hex formats and it to be case-insensitive... - let sql = &format!( - "select * from test where identity = {} AND identity_mix = x'93dda09db9a56d8fa6c024d843e805D8262191db3b4bA84c5efcd1ad451fed4e' AND connection_id = x'{}' AND connection_id = {}", - Identity::__dummy(), - ConnectionId::ZERO, - ConnectionId::ZERO, - ); - - let rows = run_for_testing(&db, sql)?; - - let tx = begin_tx(&db); - let CrudExpr::Query(QueryExpr { - source: _, - query: mut ops, - }) = compile_sql(&db, &tx, sql)?.remove(0) - else { - panic!("Expected QueryExpr"); - }; - - assert_eq!(1, ops.len()); - - // Assert no index scan - let Query::Select(_) = ops.remove(0) else { - panic!("Expected Select"); - }; - - assert_eq!(rows, vec![row]); - - Ok(()) - } - - #[test] - fn compile_eq_and_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Note, order does not matter. - // The sargable predicate occurs last, but we can still generate an index scan. - let sql = "select * from test where a = 1 and b = 2"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(2, query.len()); - assert_one_eq_index_scan(&query[0], 1, 2u64.into()); - assert_select(&query[1]); - Ok(()) - } - - #[test] - fn compile_index_eq_and_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Note, order does not matter. - // The sargable predicate occurs first and we can generate an index scan. - let sql = "select * from test where b = 2 and a = 1"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(2, query.len()); - assert_one_eq_index_scan(&query[0], 1, 2u64.into()); - assert_select(&query[1]); - Ok(()) - } - - #[test] - fn compile_index_multi_eq_and_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with index on [b] - let schema = &[ - ("a", AlgebraicType::U64), - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - db.create_table_for_test_multi_column("test", schema, col_list![0, 1])?; - - let tx = begin_mut_tx(&db); - let sql = "select * from test where b = 2 and a = 1"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - assert_one_eq_index_scan(&query[0], col_list![0, 1], product![1u64, 2u64].into()); - Ok(()) - } - - #[test] - fn compile_eq_or_eq() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with indexes on [a] and [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[0.into(), 1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Compile query - let sql = "select * from test where a = 1 or b = 2"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - // Assert no index scan because OR is not sargable. - assert_select(&query[0]); - Ok(()) - } - - #[test] - fn compile_index_range_open() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with indexes on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Compile query - let sql = "select * from test where b > 2"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - assert_index_scan(&query[0], 1, Bound::Excluded(AlgebraicValue::U64(2)), Bound::Unbounded); - - Ok(()) - } - - #[test] - fn compile_index_range_closed() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with indexes on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Compile query - let sql = "select * from test where b > 2 and b < 5"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(1, query.len()); - assert_index_scan( - &query[0], - 1, - Bound::Excluded(AlgebraicValue::U64(2)), - Bound::Excluded(AlgebraicValue::U64(5)), - ); - - Ok(()) - } - - #[test] - fn compile_index_eq_select_range() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with indexes on [a] and [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[0.into(), 1.into()]; - db.create_table_for_test("test", schema, indexes)?; - - let tx = begin_tx(&db); - // Note, order matters - the equality condition occurs first which - // means an index scan will be generated rather than the range condition. - let sql = "select * from test where a = 3 and b > 2 and b < 5"; - let CrudExpr::Query(QueryExpr { source: _, query }) = compile_sql(&db, &tx, sql)?.remove(0) else { - panic!("Expected QueryExpr"); - }; - assert_eq!(2, query.len()); - assert_one_eq_index_scan(&query[0], 0, 3u64.into()); - assert_select(&query[1]); - Ok(()) - } - - #[test] - fn compile_join_lhs_push_down() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [a] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[0.into()]; - let lhs_id = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with no indexes - let schema = &[("b", AlgebraicType::U64), ("c", AlgebraicType::U64)]; - let indexes = &[]; - let rhs_id = db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should push sargable equality condition below join - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where lhs.a = 3"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: source_lhs, - query, - .. - }) = exp - else { - panic!("unexpected expression: {exp:#?}"); - }; - - assert_eq!(source_lhs.table_id().unwrap(), lhs_id); - assert_eq!(query.len(), 3); - - // First operation in the pipeline should be an index scan - let table_id = assert_one_eq_index_scan(&query[0], 0, 3u64.into()); - - assert_eq!(table_id, lhs_id); - - // Followed by a join with the rhs table - let Query::JoinInner(JoinExpr { - ref rhs, - col_lhs, - col_rhs, - inner: Some(ref inner_header), - }) = query[1] - else { - panic!("unexpected operator {:#?}", query[1]); - }; - - assert_eq!(rhs.source.table_id().unwrap(), rhs_id); - assert_eq!(col_lhs, 1.into()); - assert_eq!(col_rhs, 0.into()); - assert_eq!(&**inner_header, &source_lhs.head().extend(rhs.source.head())); - Ok(()) - } - - #[test] - fn compile_join_lhs_push_down_no_index() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with no indexes - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let lhs_id = db.create_table_for_test("lhs", schema, &[])?; - - // Create table [rhs] with no indexes - let schema = &[("b", AlgebraicType::U64), ("c", AlgebraicType::U64)]; - let rhs_id = db.create_table_for_test("rhs", schema, &[])?; - - let tx = begin_tx(&db); - // Should push equality condition below join - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where lhs.a = 3"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: source_lhs, - query, - .. - }) = exp - else { - panic!("unexpected expression: {exp:#?}"); - }; - assert_eq!(source_lhs.table_id().unwrap(), lhs_id); - assert_eq!(query.len(), 3); - - // The first operation in the pipeline should be a selection with `col#0 = 3` - let Query::Select(ColumnOp::ColCmpVal { - cmp: OpCmp::Eq, - lhs: ColId(0), - rhs: AlgebraicValue::U64(3), - }) = query[0] - else { - panic!("unexpected operator {:#?}", query[0]); - }; - - // The join should follow the selection - let Query::JoinInner(JoinExpr { - ref rhs, - col_lhs, - col_rhs, - inner: Some(ref inner_header), - }) = query[1] - else { - panic!("unexpected operator {:#?}", query[1]); - }; - - assert_eq!(rhs.source.table_id().unwrap(), rhs_id); - assert_eq!(col_lhs, 1.into()); - assert_eq!(col_rhs, 0.into()); - assert_eq!(&**inner_header, &source_lhs.head().extend(rhs.source.head())); - assert!(rhs.query.is_empty()); - Ok(()) - } - - #[test] - fn compile_join_rhs_push_down_no_index() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with no indexes - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let lhs_id = db.create_table_for_test("lhs", schema, &[])?; - - // Create table [rhs] with no indexes - let schema = &[("b", AlgebraicType::U64), ("c", AlgebraicType::U64)]; - let rhs_id = db.create_table_for_test("rhs", schema, &[])?; - - let tx = begin_tx(&db); - // Should push equality condition below join - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c = 3"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: source_lhs, - query, - .. - }) = exp - else { - panic!("unexpected expression: {exp:#?}"); - }; - - assert_eq!(source_lhs.table_id().unwrap(), lhs_id); - assert_eq!(query.len(), 1); - - // First and only operation in the pipeline should be a join - let Query::JoinInner(JoinExpr { - ref rhs, - col_lhs, - col_rhs, - inner: None, - }) = query[0] - else { - panic!("unexpected operator {:#?}", query[0]); - }; - - assert_eq!(rhs.source.table_id().unwrap(), rhs_id); - assert_eq!(col_lhs, 1.into()); - assert_eq!(col_rhs, 0.into()); - - // The selection should be pushed onto the rhs of the join - let Query::Select(ColumnOp::ColCmpVal { - cmp: OpCmp::Eq, - lhs: ColId(1), - rhs: AlgebraicValue::U64(3), - }) = rhs.query[0] - else { - panic!("unexpected operator {:#?}", rhs.query[0]); - }; - Ok(()) - } - - #[test] - fn compile_join_lhs_and_rhs_push_down() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [a] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[0.into()]; - let lhs_id = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with index on [c] - let schema = &[("b", AlgebraicType::U64), ("c", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let rhs_id = db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should push the sargable equality condition into the join's left arg. - // Should push the sargable range condition into the join's right arg. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where lhs.a = 3 and rhs.c < 4"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: source_lhs, - query, - .. - }) = exp - else { - panic!("unexpected result from compilation: {exp:?}"); - }; - - assert_eq!(source_lhs.table_id().unwrap(), lhs_id); - assert_eq!(query.len(), 3); - - // First operation in the pipeline should be an index scan - let table_id = assert_one_eq_index_scan(&query[0], 0, 3u64.into()); - - assert_eq!(table_id, lhs_id); - - // Followed by a join - let Query::JoinInner(JoinExpr { - ref rhs, - col_lhs, - col_rhs, - inner: Some(ref inner_header), - }) = query[1] - else { - panic!("unexpected operator {:#?}", query[1]); - }; - - assert_eq!(rhs.source.table_id().unwrap(), rhs_id); - assert_eq!(col_lhs, 1.into()); - assert_eq!(col_rhs, 0.into()); - assert_eq!(&**inner_header, &source_lhs.head().extend(rhs.source.head())); - - assert_eq!(1, rhs.query.len()); - - // The right side of the join should be an index scan - let table_id = assert_index_scan( - &rhs.query[0], - 1, - Bound::Unbounded, - Bound::Excluded(AlgebraicValue::U64(4)), - ); - - assert_eq!(table_id, rhs_id); - Ok(()) - } - - #[test] - fn compile_index_join() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let lhs_id = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with index on [b, c] - let schema = &[ - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - let indexes = &[0.into(), 1.into()]; - let rhs_id = db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should generate an index join since there is an index on `lhs.b`. - // Should push the sargable range condition into the index join's probe side. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c > 2 and rhs.c < 4 and rhs.d = 3"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: SourceExpr::DbTable(DbTable { table_id, .. }), - query, - .. - }) = exp - else { - panic!("unexpected result from compilation: {exp:?}"); - }; - - assert_eq!(table_id, lhs_id); - assert_eq!(query.len(), 1); - - let Query::IndexJoin(IndexJoin { - probe_side: - QueryExpr { - source: SourceExpr::DbTable(DbTable { table_id, .. }), - query: rhs, - }, - probe_col, - index_side: SourceExpr::DbTable(DbTable { - table_id: index_table, .. - }), - index_col, - .. - }) = &query[0] - else { - panic!("unexpected operator {:#?}", query[0]); - }; - - assert_eq!(*table_id, rhs_id); - assert_eq!(*index_table, lhs_id); - assert_eq!(index_col, &1.into()); - assert_eq!(*probe_col, 0.into()); - - assert_eq!(2, rhs.len()); - - // The probe side of the join should be an index scan - let table_id = assert_index_scan( - &rhs[0], - 1, - Bound::Excluded(AlgebraicValue::U64(2)), - Bound::Excluded(AlgebraicValue::U64(4)), - ); - - assert_eq!(table_id, rhs_id); - - // Followed by a selection - let Query::Select(ColumnOp::ColCmpVal { - cmp: OpCmp::Eq, - lhs: ColId(2), - rhs: AlgebraicValue::U64(3), - }) = rhs[1] - else { - panic!("unexpected operator {:#?}", rhs[0]); - }; - Ok(()) - } - - #[test] - fn compile_index_multi_join() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let lhs_id = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with index on [b, c] - let schema = &[ - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - let indexes = col_list![0, 1]; - let rhs_id = db.create_table_for_test_multi_column("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should generate an index join since there is an index on `lhs.b`. - // Should push the sargable range condition into the index join's probe side. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c = 2 and rhs.b = 4 and rhs.d = 3"; - let exp = compile_sql(&db, &tx, sql)?.remove(0); - - let CrudExpr::Query(QueryExpr { - source: SourceExpr::DbTable(DbTable { table_id, .. }), - query, - .. - }) = exp - else { - panic!("unexpected result from compilation: {exp:?}"); - }; - - assert_eq!(table_id, lhs_id); - assert_eq!(query.len(), 1); - - let Query::IndexJoin(IndexJoin { - probe_side: - QueryExpr { - source: SourceExpr::DbTable(DbTable { table_id, .. }), - query: rhs, - }, - probe_col, - index_side: SourceExpr::DbTable(DbTable { - table_id: index_table, .. - }), - index_col, - .. - }) = &query[0] - else { - panic!("unexpected operator {:#?}", query[0]); - }; - - assert_eq!(*table_id, rhs_id); - assert_eq!(*index_table, lhs_id); - assert_eq!(index_col, &1.into()); - assert_eq!(*probe_col, 0.into()); - - assert_eq!(2, rhs.len()); - - // The probe side of the join should be an index scan - let table_id = assert_one_eq_index_scan(&rhs[0], col_list![0, 1], product![4u64, 2u64].into()); - - assert_eq!(table_id, rhs_id); - - // Followed by a selection - let Query::Select(ColumnOp::ColCmpVal { - cmp: OpCmp::Eq, - lhs: ColId(2), - rhs: AlgebraicValue::U64(3), - }) = rhs[1] - else { - panic!("unexpected operator {:#?}", rhs[0]); - }; - Ok(()) - } - - #[test] - fn compile_join_with_diff_col_names() -> ResultTest<()> { - let db = TestDB::durable()?; - db.create_table_for_test("A", &[("x", AlgebraicType::U64)], &[])?; - db.create_table_for_test("B", &[("y", AlgebraicType::U64)], &[])?; - assert!(compile_sql(&db, &begin_tx(&db), "select B.* from B join A on B.y = A.x").is_ok()); - Ok(()) - } - - #[test] - fn compile_type_check() -> ResultTest<()> { - let db = TestDB::durable()?; - db.create_table_for_test("PlayerState", &[("entity_id", AlgebraicType::U64)], &[0.into()])?; - db.create_table_for_test("EnemyState", &[("entity_id", AlgebraicType::I8)], &[0.into()])?; - db.create_table_for_test("FriendState", &[("entity_id", AlgebraicType::U64)], &[0.into()])?; - let sql = "SELECT * FROM PlayerState WHERE entity_id = '161853'"; - - // Should fail with type mismatch for selections and joins. - // - // TODO: Type check other operations deferred for the new query engine. - - assert!( - compile_sql(&db, &begin_tx(&db), sql).is_err(), - // Err("SqlError: Type Mismatch: `PlayerState.entity_id: U64` != `String(\"161853\"): String`, executing: `SELECT * FROM PlayerState WHERE entity_id = '161853'`".into()) - ); - - // Check we can still compile the query if we remove the type mismatch and have multiple logical operations. - let sql = "SELECT * FROM PlayerState WHERE entity_id = 1 AND entity_id = 2 AND entity_id = 3 OR entity_id = 4 OR entity_id = 5"; - - assert!(compile_sql(&db, &begin_tx(&db), sql).is_ok()); - - // Now verify when we have a type mismatch in the middle of the logical operations. - let sql = "SELECT * FROM PlayerState WHERE entity_id = 1 AND entity_id"; - - assert!( - compile_sql(&db, &begin_tx(&db), sql).is_err(), - // Err("SqlError: Type Mismatch: `PlayerState.entity_id: U64 == U64(1): U64` and `PlayerState.entity_id: U64`, both sides must be an `Bool` expression, executing: `SELECT * FROM PlayerState WHERE entity_id = 1 AND entity_id`".into()) - ); - // Verify that all operands of `AND` must be `Bool`. - let sql = "SELECT * FROM PlayerState WHERE entity_id AND entity_id"; - - assert!( - compile_sql(&db, &begin_tx(&db), sql).is_err(), - // Err("SqlError: Type Mismatch: `PlayerState.entity_id: U64` and `PlayerState.entity_id: U64`, both sides must be an `Bool` expression, executing: `SELECT * FROM PlayerState WHERE entity_id AND entity_id`".into()) - ); - Ok(()) - } -} diff --git a/crates/core/src/sql/execute.rs b/crates/core/src/sql/execute.rs index f42b0c10a83..6265806aecf 100644 --- a/crates/core/src/sql/execute.rs +++ b/crates/core/src/sql/execute.rs @@ -7,17 +7,15 @@ use crate::energy::EnergyQuanta; use crate::error::DBError; use crate::estimation::estimate_rows_scanned; use crate::host::module_host::{ - DatabaseTableUpdate, DatabaseUpdate, EventStatus, ModuleEvent, ModuleFunctionCall, RefInstance, ViewCallError, - ViewCallResult, ViewOutcome, WasmInstance, + DatabaseUpdate, EventStatus, ModuleEvent, ModuleFunctionCall, RefInstance, ViewCallError, ViewCallResult, + ViewOutcome, WasmInstance, }; use crate::host::{ArgsTuple, ModuleHost}; use crate::subscription::module_subscription_actor::{commit_and_broadcast_event, ModuleSubscriptions}; use crate::subscription::module_subscription_manager::TransactionOffset; use crate::subscription::tx::DeltaTx; -use crate::util::slow::SlowQueryLogger; -use crate::vm::{check_row_limit, DbProgram, TxMode}; +use crate::vm::check_row_limit; use anyhow::anyhow; -use smallvec::SmallVec; use spacetimedb_datastore::execution_context::Workload; use spacetimedb_datastore::traits::IsolationLevel; use spacetimedb_expr::statement::Statement; @@ -27,9 +25,6 @@ use spacetimedb_lib::Timestamp; use spacetimedb_lib::{AlgebraicType, ProductType, ProductValue}; use spacetimedb_query::{compile_sql_stmt, execute_dml_stmt, execute_select_stmt}; use spacetimedb_sats::raw_identifier::RawIdentifier; -use spacetimedb_vm::eval::run_ast; -use spacetimedb_vm::expr::{CodeResult, CrudExpr, Expr}; -use spacetimedb_vm::relation::MemTable; use tokio::sync::oneshot; pub struct StmtResult { @@ -37,144 +32,6 @@ pub struct StmtResult { pub rows: Vec, } -// TODO(cloutiertyler): we could do this the swift parsing way in which -// we always generate a plan, but it may contain errors - -pub(crate) fn collect_result( - result: &mut Vec, - updates: &mut SmallVec<[DatabaseTableUpdate; 1]>, - r: CodeResult, -) -> Result<(), DBError> { - match r { - CodeResult::Value(_) => {} - CodeResult::Table(x) => result.push(x), - CodeResult::Block(lines) => { - for x in lines { - collect_result(result, updates, x)?; - } - } - CodeResult::Halt(err) => return Err(DBError::VmUser(err)), - CodeResult::Pass(x) => match x { - None => {} - Some(update) => { - updates.push(DatabaseTableUpdate { - table_name: update.table_name, - table_id: update.table_id, - inserts: update.inserts.into(), - deletes: update.deletes.into(), - }); - } - }, - } - - Ok(()) -} - -fn execute( - p: &mut DbProgram<'_, '_>, - ast: Vec, - sql: &str, - updates: &mut SmallVec<[DatabaseTableUpdate; 1]>, -) -> Result, DBError> { - let slow_query_threshold = if let TxMode::Tx(tx) = p.tx { - p.db.query_limit(tx)?.map(Duration::from_millis) - } else { - None - }; - let _slow_query_logger = SlowQueryLogger::new(sql, slow_query_threshold, p.tx.ctx().workload()).log_guard(); - let mut result = Vec::with_capacity(ast.len()); - let query = Expr::Block(ast.into_iter().map(|x| Expr::Crud(Box::new(x))).collect()); - // SQL queries can never reference `MemTable`s, so pass an empty `SourceSet`. - collect_result(&mut result, updates, run_ast(p, query, [].into()).into())?; - Ok(result) -} - -/// Run the compiled `SQL` expression inside the `vm` created by [DbProgram] -/// -/// Evaluates `ast` and accordingly triggers mutable or read tx to execute -/// -/// Also, in case the execution takes more than x, log it as `slow query` -pub fn execute_sql( - db: &RelationalDB, - sql: &str, - ast: Vec, - auth: AuthCtx, - subs: Option<&ModuleSubscriptions>, -) -> Result, DBError> { - if CrudExpr::is_reads(&ast) { - let mut updates = SmallVec::new(); - db.with_read_only(Workload::Sql, |tx| { - execute( - &mut DbProgram::new(db, &mut TxMode::Tx(tx), auth), - ast, - sql, - &mut updates, - ) - }) - } else if subs.is_none() { - let mut updates = SmallVec::new(); - db.with_auto_commit(Workload::Sql, |mut_tx| { - execute( - &mut DbProgram::new(db, &mut mut_tx.into(), auth), - ast, - sql, - &mut updates, - ) - }) - } else { - let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::Sql); - let mut updates = SmallVec::with_capacity(ast.len()); - let res = execute( - &mut DbProgram::new(db, &mut (&mut tx).into(), auth.clone()), - ast, - sql, - &mut updates, - ); - if res.is_ok() && !updates.is_empty() { - let event = ModuleEvent { - timestamp: Timestamp::now(), - caller_identity: auth.caller(), - caller_connection_id: None, - function_call: ModuleFunctionCall { - reducer: <_>::default(), - reducer_id: u32::MAX.into(), - args: ArgsTuple::default(), - }, - status: EventStatus::Committed(DatabaseUpdate { tables: updates }), - reducer_return_value: None, - energy_quanta_used: EnergyQuanta::ZERO, - host_execution_duration: Duration::ZERO, - request_id: None, - timer: None, - }; - commit_and_broadcast_event(subs.unwrap(), None, event, tx); - res - } else { - db.finish_tx(tx, res) - } - } -} - -/// Like [`execute_sql`], but for providing your own `tx`. -/// -/// Returns None if you pass a mutable query with an immutable tx. -pub fn execute_sql_tx<'a>( - db: &RelationalDB, - tx: impl Into>, - sql: &str, - ast: Vec, - auth: AuthCtx, -) -> Result>, DBError> { - let mut tx = tx.into(); - - if matches!(tx, TxMode::Tx(_)) && !CrudExpr::is_reads(&ast) { - return Ok(None); - } - - let mut updates = SmallVec::new(); // No subscription updates in this path, because it requires owning the tx. - execute(&mut DbProgram::new(db, &mut tx, auth), ast, sql, &mut updates).map(Some) -} - #[derive(Debug)] pub struct SqlResult { /// The offset of the SQL operation's transaction. @@ -372,7 +229,6 @@ pub(crate) mod tests { use super::*; use crate::db::relational_db::tests_utils::{self, begin_tx, insert, with_auto_commit, TestDB}; - use crate::vm::tests::create_table_with_rows; use itertools::Itertools; use pretty_assertions::assert_eq; use spacetimedb_datastore::system_tables::{ @@ -384,17 +240,9 @@ pub(crate) mod tests { use spacetimedb_lib::{AlgebraicValue, Identity}; use spacetimedb_primitives::{col_list, ColId, TableId}; use spacetimedb_sats::{product, AlgebraicType, ArrayValue, ProductType}; - use spacetimedb_schema::relation::Header; - use spacetimedb_vm::eval::test_helpers::create_game_data; - - pub(crate) fn execute_for_testing( - db: &Arc, - sql_text: &str, - q: Vec, - ) -> Result, DBError> { - let (subs, _runtime) = ModuleSubscriptions::for_test_new_runtime(db.clone()); - execute_sql(db, sql_text, q, AuthCtx::for_testing(), Some(&subs)) - } + use spacetimedb_schema::identifier::Identifier; + use spacetimedb_schema::schema::{ColumnSchema, TableSchema}; + use spacetimedb_schema::table_name::TableName; /// Short-cut for simplify test execution pub(crate) fn run_for_testing(db: &Arc, sql_text: &str) -> Result, DBError> { @@ -411,7 +259,100 @@ pub(crate) mod tests { .map(|x| x.rows) } - fn create_data(total_rows: u64) -> ResultTest<(TestDB, MemTable)> { + #[derive(Clone, Debug, Default, Eq, PartialEq)] + struct TestRows { + data: Vec, + } + + struct GameData { + location: TestRows, + inv: TestRows, + player: TestRows, + location_ty: ProductType, + inv_ty: ProductType, + player_ty: ProductType, + } + + fn create_game_data() -> GameData { + let inv_ty = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); + let inv = TestRows { + data: vec![product!(1u64, "health")], + }; + + let player_ty = ProductType::from([("entity_id", AlgebraicType::U64), ("inventory_id", AlgebraicType::U64)]); + let player = TestRows { + data: vec![product!(100u64, 1u64), product!(200u64, 1u64), product!(300u64, 1u64)], + }; + + let location_ty = ProductType::from([ + ("entity_id", AlgebraicType::U64), + ("x", AlgebraicType::F32), + ("z", AlgebraicType::F32), + ]); + let location = TestRows { + data: vec![product!(100u64, 0.0f32, 32.0f32), product!(100u64, 1.0f32, 31.0f32)], + }; + + GameData { + location, + inv, + player, + location_ty, + inv_ty, + player_ty, + } + } + + fn create_table_with_rows( + db: &RelationalDB, + tx: &mut crate::db::relational_db::MutTx, + table_name: &str, + schema: ProductType, + rows: &[ProductValue], + access: StAccess, + ) -> ResultTest> { + let columns = schema + .elements + .iter() + .cloned() + .enumerate() + .map(|(i, element)| ColumnSchema { + table_id: TableId::SENTINEL, + col_name: Identifier::new(element.name.unwrap()).unwrap(), + col_type: element.algebraic_type, + col_pos: ColId(i as _), + alias: None, + }) + .collect(); + + let table_id = db.create_table( + tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(table_name), + None, + columns, + vec![], + vec![], + vec![], + StTableType::User, + access, + None, + None, + false, + None, + ), + )?; + let schema = db.schema_for_table_mut(tx, table_id)?; + + for row in rows { + insert(db, tx, table_id, row)?; + } + + Ok(schema) + } + + fn create_data(total_rows: u64) -> ResultTest<(TestDB, TestRows)> { let stdb = TestDB::durable()?; let rows: Vec<_> = (1..=total_rows) @@ -419,25 +360,22 @@ pub(crate) mod tests { .collect(); let head = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let schema = with_auto_commit(&stdb, |tx| { + with_auto_commit(&stdb, |tx| { create_table_with_rows(&stdb, tx, "inventory", head.clone(), &rows, StAccess::Public) })?; - let header = Header::from(&*schema).into(); - - Ok((stdb, MemTable::new(header, schema.table_access, rows))) + Ok((stdb, TestRows { data: rows })) } - fn create_identity_table(table_name: &str) -> ResultTest<(TestDB, MemTable)> { + fn create_identity_table(table_name: &str) -> ResultTest<(TestDB, TestRows)> { let stdb = TestDB::durable()?; let head = ProductType::from([("identity", AlgebraicType::identity())]); let rows = vec![product!(Identity::ZERO), product!(Identity::ONE)]; - let schema = with_auto_commit(&stdb, |tx| { + with_auto_commit(&stdb, |tx| { create_table_with_rows(&stdb, tx, table_name, head.clone(), &rows, StAccess::Public) })?; - let header = Header::from(&*schema).into(); - Ok((stdb, MemTable::new(header, schema.table_access, rows))) + Ok((stdb, TestRows { data: rows })) } #[test] diff --git a/crates/core/src/sql/mod.rs b/crates/core/src/sql/mod.rs index 77bedc08261..741105f0e75 100644 --- a/crates/core/src/sql/mod.rs +++ b/crates/core/src/sql/mod.rs @@ -1,5 +1,3 @@ pub mod ast; -pub mod compiler; pub mod execute; pub mod parser; -mod type_check; diff --git a/crates/core/src/sql/type_check.rs b/crates/core/src/sql/type_check.rs deleted file mode 100644 index 12324686407..00000000000 --- a/crates/core/src/sql/type_check.rs +++ /dev/null @@ -1,223 +0,0 @@ -use crate::error::PlanError; -use crate::sql::ast::From; -use crate::sql::ast::{Selection, SqlAst}; -use spacetimedb_lib::operator::OpQuery; -use spacetimedb_sats::algebraic_type::fmt::fmt_algebraic_type; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -use spacetimedb_schema::relation::FieldName; -use spacetimedb_schema::schema::ColumnSchema; -use spacetimedb_vm::errors::ErrorType; -use spacetimedb_vm::expr::{FieldExpr, FieldOp}; -use std::fmt; - -fn find_field_name(from: &From, field: FieldName) -> Result<(&str, &ColumnSchema), PlanError> { - from.find_field_name(field).ok_or_else(|| PlanError::UnknownFieldName { - field, - tables: from.iter_tables().map(|t| t.table_name.clone()).collect(), - }) -} - -#[derive(Debug)] -enum Typed<'a> { - Field { - table: &'a str, - field: &'a str, - ty: Option, - }, - Value { - value: &'a AlgebraicValue, - ty: Option, - }, - Cmp { - op: OpQuery, - lhs: Box>, - rhs: Box>, - }, -} - -impl Typed<'_> { - pub fn ty(&self) -> Option<&AlgebraicType> { - match self { - Typed::Field { ty, .. } | Typed::Value { ty, .. } => ty.as_ref(), - Typed::Cmp { .. } => Some(&AlgebraicType::Bool), - } - } - - pub fn set_ty(&mut self, ty: Option) { - match self { - Typed::Field { ty: ty_lhs, .. } | Typed::Value { ty: ty_lhs, .. } => { - *ty_lhs = ty; - } - Typed::Cmp { .. } => {} - } - } -} - -impl fmt::Display for Typed<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Typed::Field { table, field, ty } => { - if let Some(ty) = ty { - write!(f, "{table}.{field}: {}", fmt_algebraic_type(ty)) - } else { - write!(f, "{table}.{field}: ?",) - } - } - Typed::Value { value, ty } => { - if let Some(ty) = ty { - write!(f, "{value:?}: {}", fmt_algebraic_type(ty)) - } else { - write!(f, "{value:?}: ?") - } - } - Typed::Cmp { op, lhs, rhs, .. } => { - write!(f, "{lhs} {op} {rhs}") - } - } - } -} - -#[derive(Debug)] -struct QueryFragment<'a, T> { - from: &'a From, - q: &'a T, -} - -/// Type check trait for `sql` query fragments -pub(crate) trait TypeCheck { - /// Type check the query fragment - fn type_check(&self) -> Result<(), PlanError>; -} - -/// Resolve the type of the field, that in the case of `SumType` we need to resolve using the `field` -fn resolve_type(field: &FieldExpr, ty: AlgebraicType) -> Result, PlanError> { - // The `SumType` returns `None` on `type_of` so we need to check against the value - if let AlgebraicType::Sum(ty) = &ty { - // We can use in `sql` coercion from string to sum type: `tag = 'name'` - if let FieldExpr::Value(val_rhs) = field { - if let Some(val_rhs) = val_rhs.as_string() - && ty.get_variant_simple(val_rhs).is_some() - { - return Ok(Some(AlgebraicType::Sum(ty.clone()))); - } - // or check it against a `SumValue` type: `tag = { tag: 0, value: 1 }` - if let Some(val_rhs) = val_rhs.as_sum() - && ty.is_simple_enum() - && ty.get_variant_by_tag(val_rhs.tag).is_some() - { - return Ok(Some(AlgebraicType::Sum(ty.clone()))); - } - } - } - - if let (AlgebraicType::Product(_), FieldExpr::Value(val)) = (&ty, field) { - match val { - AlgebraicValue::U128(_) => return Ok(Some(AlgebraicType::U128)), - AlgebraicValue::U256(_) => return Ok(Some(AlgebraicType::U256)), - _ => {} - } - } - Ok(Some(ty)) -} - -fn check_both(op: OpQuery, lhs: &Typed, rhs: &Typed) -> Result<(), PlanError> { - match op { - OpQuery::Cmp(_) => { - if lhs.ty() != rhs.ty() { - return Err(ErrorType::TypeMismatch { - lhs: lhs.to_string(), - rhs: rhs.to_string(), - } - .into()); - } - } - OpQuery::Logic(op) => { - if (lhs.ty(), rhs.ty()) != (Some(&AlgebraicType::Bool), Some(&AlgebraicType::Bool)) { - return Err(ErrorType::TypeMismatchLogic { - lhs: lhs.to_string(), - rhs: rhs.to_string(), - op, - expected: fmt_algebraic_type(&AlgebraicType::Bool).to_string(), - } - .into()); - } - } - } - Ok(()) -} - -/// Patch the type of the field if the type is an `Identity`, `ConnectionId` or `Enum` -fn patch_type(lhs: &FieldOp, ty_lhs: &mut Typed, ty_rhs: &Typed) -> Result<(), PlanError> { - if let FieldOp::Field(lhs_field) = lhs - && let Some(ty) = ty_rhs.ty() - && (ty.is_sum() || ty.as_product().is_some_and(|x| x.is_special())) - { - ty_lhs.set_ty(resolve_type(lhs_field, ty.clone())?); - } - Ok(()) -} - -fn type_check(of: QueryFragment) -> Result { - match of.q { - FieldOp::Field(expr) => match expr { - FieldExpr::Name(x) => { - let (table, col) = find_field_name(of.from, *x)?; - - Ok(Typed::Field { - table, - field: &col.col_name, - ty: Some(col.col_type.clone()), - }) - } - FieldExpr::Value(value) => Ok(Typed::Value { - value, - ty: value.type_of(), - }), - }, - FieldOp::Cmp { op, lhs, rhs } => { - let mut ty_lhs = type_check(QueryFragment { from: of.from, q: lhs })?; - let mut ty_rhs = type_check(QueryFragment { from: of.from, q: rhs })?; - - // TODO: For the cases of `Identity, ConnectionId, Enum` we need to resolve the type from the value we are comparing, - // because the type is not lifted when we parse the query on `spacetimedb_vm::ops::parse`. - // - // This is a temporary solution until we have a better way to resolve the type of the field. - patch_type(lhs, &mut ty_lhs, &ty_rhs)?; - patch_type(rhs, &mut ty_rhs, &ty_lhs)?; - - check_both(*op, &ty_lhs, &ty_rhs)?; - - Ok(Typed::Cmp { - op: *op, - lhs: Box::new(ty_lhs), - rhs: Box::new(ty_rhs), - }) - } - } -} - -impl TypeCheck for QueryFragment<'_, Selection> { - fn type_check(&self) -> Result<(), PlanError> { - type_check(QueryFragment { - from: self.from, - q: &self.q.clause, - })?; - Ok(()) - } -} - -impl TypeCheck for SqlAst { - // TODO: Other options deferred for the new query engine - fn type_check(&self) -> Result<(), PlanError> { - if let SqlAst::Select { - from, - project: _, - selection: Some(selection), - } = self - { - QueryFragment { from, q: selection }.type_check()?; - } - - Ok(()) - } -} diff --git a/crates/core/src/subscription/delta.rs b/crates/core/src/subscription/delta.rs index bdd99bbc5fd..b2747a933e2 100644 --- a/crates/core/src/subscription/delta.rs +++ b/crates/core/src/subscription/delta.rs @@ -1,13 +1,11 @@ use crate::host::module_host::UpdatesRelValue; use anyhow::Result; use spacetimedb_data_structures::map::{HashCollectionExt as _, HashMap}; -use spacetimedb_execution::{Datastore, DeltaStore, Row}; +use spacetimedb_execution::{Datastore, DeltaStore, RelValue, Row}; use spacetimedb_lib::metrics::ExecutionMetrics; use spacetimedb_primitives::ColList; use spacetimedb_sats::product_value::InvalidFieldError; use spacetimedb_subscription::SubscriptionPlan; -use spacetimedb_vm::relation::RelValue; - /// Evaluate a subscription over a delta update. /// Returns `None` for empty updates. /// diff --git a/crates/core/src/subscription/execution_unit.rs b/crates/core/src/subscription/execution_unit.rs index 420f2daeb46..6edbd69d899 100644 --- a/crates/core/src/subscription/execution_unit.rs +++ b/crates/core/src/subscription/execution_unit.rs @@ -1,45 +1,7 @@ -use super::query::{self, Supported}; -use super::subscription::{IncrementalJoin, SupportedQuery}; -use crate::db::relational_db::{RelationalDB, Tx}; -use crate::error::DBError; -use crate::estimation; -use crate::host::module_host::{DatabaseTableUpdate, DatabaseTableUpdateRelValue, UpdatesRelValue}; -use crate::subscription::websocket_building::{BuildableWebsocketFormat, RowListBuilderSource}; -use crate::util::slow::SlowQueryLogger; -use crate::vm::{build_query, TxMode}; -use spacetimedb_client_api_messages::websocket::common::RowListLen as _; -use spacetimedb_client_api_messages::websocket::v1::{self as ws_v1}; -use spacetimedb_datastore::locking_tx_datastore::TxId; -use spacetimedb_lib::identity::AuthCtx; use spacetimedb_lib::Identity; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::{u256, ProductValue}; -use spacetimedb_schema::def::error::AuthError; -use spacetimedb_schema::relation::DbTable; -use spacetimedb_schema::table_name::TableName; -use spacetimedb_vm::eval::IterRows; -use spacetimedb_vm::expr::{AuthAccess, NoInMemUsed, Query, QueryExpr, SourceExpr, SourceId}; -use spacetimedb_vm::rel_ops::RelOps; -use spacetimedb_vm::relation::RelValue; -use std::hash::Hash; -use std::time::Duration; +use spacetimedb_sats::u256; -/// A hash for uniquely identifying query execution units, -/// to avoid recompilation of queries that have an open subscription. -/// -/// Currently we are using a cryptographic hash, -/// which is most certainly overkill. -/// However the benefits include uniqueness by definition, -/// and a compact representation for equality comparisons. -/// -/// It also decouples the hash from the physical plan. -/// -/// Note that we could hash QueryExprs directly, -/// using the standard library's hasher. -/// However some execution units are comprised of several query plans, -/// as is the case for incremental joins. -/// And we want to associate a hash with the entire unit of execution, -/// rather than an individual plan. +/// A hash for uniquely identifying subscription plans. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct QueryHash { data: [u8; 32], @@ -52,13 +14,13 @@ impl From for u256 { } impl QueryHash { - /// The zero value of a QueryHash + /// The zero value of a QueryHash. pub const NONE: Self = Self { data: [0; 32] }; - /// The min value of a QueryHash + /// The min value of a QueryHash. pub const MIN: Self = Self::NONE; - /// The max value of a QueryHash + /// The max value of a QueryHash. pub const MAX: Self = Self { data: [0xFFu8; 32] }; pub fn from_bytes(bytes: &[u8]) -> Self { @@ -67,278 +29,21 @@ impl QueryHash { } } - /// Generate a hash from a query string - pub fn from_string(str: &str, identity: Identity, has_param: bool) -> Self { + /// Generate a hash from a query string. + pub fn from_string(sql: &str, identity: Identity, has_param: bool) -> Self { if has_param { - return Self::from_string_and_identity(str, identity); + return Self::from_string_and_identity(sql, identity); } - Self::from_bytes(str.as_bytes()) + Self::from_bytes(sql.as_bytes()) } - /// If a query is parameterized with `:sender`, we must use the value of `:sender`, - /// i.e. the identity of the caller, when hashing the query text, - /// so that two identical queries from different clients aren't hashed to the same value. - /// - /// TODO: Once we have RLS, this hash must computed after name resolution. - /// It can no longer be computed from the source text. - pub fn from_string_and_identity(str: &str, identity: Identity) -> Self { + /// Parameterized queries must include the caller identity in their hash. + pub fn from_string_and_identity(sql: &str, identity: Identity) -> Self { let mut hasher = blake3::Hasher::new(); - hasher.update(str.as_bytes()); + hasher.update(sql.as_bytes()); hasher.update(&identity.to_byte_array()); Self { data: hasher.finalize().into(), } } } - -#[derive(Debug)] -enum EvalIncrPlan { - /// For semijoins, store several versions of the plan, - /// for querying all combinations of L_{inserts/deletes/committed} * R_(inserts/deletes/committed). - Semijoin(IncrementalJoin), - - /// For single-table selects, store only one version of the plan, - /// which has a single source, an in-memory table, produced by [`query::query_to_mem_table`]. - Select(QueryExpr), -} - -/// An atomic unit of execution within a subscription set. -/// Currently just a single query plan, -/// however in the future this could be multiple query plans, -/// such as those of an incremental join. -#[derive(Debug)] -pub struct ExecutionUnit { - hash: QueryHash, - - pub(crate) sql: String, - /// A version of the plan optimized for `eval`, - /// whose source is a [`DbTable`]. - /// - /// This is a direct compilation of the source query. - eval_plan: QueryExpr, - /// A version of the plan optimized for `eval_incr`, - /// whose source is an in-memory table, as if by [`query::to_mem_table`]. - eval_incr_plan: EvalIncrPlan, -} - -/// An ExecutionUnit is uniquely identified by its QueryHash. -impl Eq for ExecutionUnit {} - -impl PartialEq for ExecutionUnit { - fn eq(&self, other: &Self) -> bool { - self.hash == other.hash - } -} - -impl From for ExecutionUnit { - // Used in tests and benches. - // TODO(bikeshedding): Remove this impl, - // in favor of more explicit calls to `ExecutionUnit::new` with `QueryHash::NONE`. - fn from(plan: SupportedQuery) -> Self { - Self::new(plan, QueryHash::NONE).unwrap() - } -} - -impl ExecutionUnit { - /// Pre-compute a plan for `eval_incr` which reads from an in-memory table - /// rather than re-planning on every incremental update. - fn compile_select_eval_incr(expr: &QueryExpr) -> QueryExpr { - let source = &expr.source; - assert!( - source.is_db_table(), - "The plan passed to `compile_select_eval_incr` must read from `DbTable`s, but found in-mem table" - ); - let source = SourceExpr::from_mem_table(source.head().clone(), source.table_access(), SourceId(0)); - let query = expr.query.clone(); - QueryExpr { source, query } - } - - pub fn new(eval_plan: SupportedQuery, hash: QueryHash) -> Result { - // Pre-compile the `expr` as fully as possible, twice, for two different paths: - // - `eval_incr_plan`, for incremental updates from an `SourceExpr::InMemory` table. - // - `eval_plan`, for initial subscriptions from a `SourceExpr::DbTable`. - - let eval_incr_plan = match &eval_plan { - SupportedQuery { - kind: query::Supported::Select, - expr, - .. - } => EvalIncrPlan::Select(Self::compile_select_eval_incr(expr)), - SupportedQuery { - kind: query::Supported::Semijoin, - expr, - .. - } => EvalIncrPlan::Semijoin(IncrementalJoin::new(expr)?), - }; - Ok(ExecutionUnit { - hash, - sql: eval_plan.sql, - eval_plan: eval_plan.expr, - eval_incr_plan, - }) - } - - /// Is this a single table select or a semijoin? - pub fn kind(&self) -> Supported { - match self.eval_incr_plan { - EvalIncrPlan::Select(_) => Supported::Select, - EvalIncrPlan::Semijoin(_) => Supported::Semijoin, - } - } - - /// The unique query hash for this execution unit. - pub fn hash(&self) -> QueryHash { - self.hash - } - - fn return_db_table(&self) -> &DbTable { - self.eval_plan - .source - .get_db_table() - .expect("ExecutionUnit eval_plan should have DbTable source, but found in-mem table") - } - - /// The table from which this query returns rows. - pub fn return_table(&self) -> TableId { - self.return_db_table().table_id - } - - pub fn return_name(&self) -> &TableName { - &self.return_db_table().head.table_name - } - - /// The table on which this query filters rows. - /// In the case of a single table select, - /// this is the same as the return table. - /// In the case of a semijoin, - /// it is the auxiliary table against which we are joining. - pub fn filter_table(&self) -> TableId { - let return_table = self.return_table(); - self.eval_plan - .query - .first() - .and_then(|op| { - if let Query::IndexJoin(join) = op { - Some(join) - } else { - None - } - }) - .and_then(|join| { - join.index_side - .get_db_table() - .filter(|t| t.table_id != return_table) - .or_else(|| join.probe_side.source.get_db_table()) - .filter(|t| t.table_id != return_table) - .map(|t| t.table_id) - }) - .unwrap_or(return_table) - } - - /// Evaluate this execution unit against the database using the specified format. - #[tracing::instrument(level = "trace", skip_all)] - pub fn eval( - &self, - db: &RelationalDB, - tx: &Tx, - rlb_pool: &impl RowListBuilderSource, - sql: &str, - slow_query_threshold: Option, - compression: ws_v1::Compression, - ) -> Option> { - let _slow_query = SlowQueryLogger::new(sql, slow_query_threshold, tx.ctx.workload()).log_guard(); - - // Build & execute the query and then encode it to a row list. - let tx = &tx.into(); - let mut inserts = build_query(db, tx, &self.eval_plan, &mut NoInMemUsed); - let inserts = inserts.iter(); - let (inserts, num_rows) = F::encode_list(rlb_pool.take_row_list_builder(), inserts); - - (!inserts.is_empty()).then(|| { - let deletes = F::List::default(); - let qu = ws_v1::QueryUpdate { deletes, inserts }; - let update = F::into_query_update(qu, compression); - ws_v1::TableUpdate::new( - self.return_table(), - self.return_name().clone().into(), - ws_v1::SingleQueryUpdate { update, num_rows }, - ) - }) - } - - /// Evaluate this execution unit against the given delta tables. - pub fn eval_incr<'a>( - &'a self, - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - sql: &'a str, - tables: impl 'a + Clone + Iterator, - slow_query_threshold: Option, - ) -> Option> { - let _slow_query = SlowQueryLogger::new(sql, slow_query_threshold, tx.ctx().workload()).log_guard(); - let updates = match &self.eval_incr_plan { - EvalIncrPlan::Select(plan) => Self::eval_incr_query_expr(db, tx, tables, plan, self.return_table()), - EvalIncrPlan::Semijoin(plan) => plan.eval(db, tx, tables), - }; - - updates.has_updates().then(|| DatabaseTableUpdateRelValue { - table_id: self.return_table(), - table_name: self.return_name().clone(), - updates, - }) - } - - fn eval_query_expr_against_memtable<'a>( - db: &'a RelationalDB, - tx: &'a TxMode, - mem_table: &'a [ProductValue], - eval_incr_plan: &'a QueryExpr, - ) -> Box> { - // Provide the updates from `table`. - let sources = &mut Some(mem_table.iter().map(RelValue::ProjRef)); - // Evaluate the saved plan against the new updates, - // returning an iterator over the selected rows. - build_query(db, tx, eval_incr_plan, sources) - } - - fn eval_incr_query_expr<'a>( - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - tables: impl Iterator, - eval_incr_plan: &'a QueryExpr, - return_table: TableId, - ) -> UpdatesRelValue<'a> { - assert!( - eval_incr_plan.source.is_mem_table(), - "Expected in-mem table in `eval_incr_plan`, but found `DbTable`" - ); - - let mut deletes = Vec::new(); - let mut inserts = Vec::new(); - for table in tables.filter(|table| table.table_id == return_table) { - // Evaluate the query separately against inserts and deletes, - // so that we can pass each row to the query engine unaltered, - // without forgetting which are inserts and which are deletes. - // Previously, we used to add such a column `"__op_type: AlgebraicType::U8"`. - if !table.inserts.is_empty() { - inserts.extend(Self::eval_query_expr_against_memtable(db, tx, &table.inserts, eval_incr_plan).iter()); - } - if !table.deletes.is_empty() { - deletes.extend(Self::eval_query_expr_against_memtable(db, tx, &table.deletes, eval_incr_plan).iter()); - } - } - - UpdatesRelValue { deletes, inserts } - } - - /// The estimated number of rows returned by this execution unit. - pub fn row_estimate(&self, tx: &TxId) -> u64 { - estimation::num_rows(tx, &self.eval_plan) - } -} - -impl AuthAccess for ExecutionUnit { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - self.eval_plan.check_auth(auth) - } -} diff --git a/crates/core/src/subscription/query.rs b/crates/core/src/subscription/query.rs index 3f8d1c46027..bea30f96b7f 100644 --- a/crates/core/src/subscription/query.rs +++ b/crates/core/src/subscription/query.rs @@ -1,18 +1,14 @@ -use crate::db::relational_db::{RelationalDB, Tx}; +use super::execution_unit::QueryHash; +use super::module_subscription_manager::Plan; +use crate::db::relational_db::Tx; use crate::error::{DBError, SubscriptionError}; use crate::sql::ast::SchemaViewer; -use crate::sql::compiler::compile_sql; -use crate::subscription::subscription::SupportedQuery; use once_cell::sync::Lazy; use regex::Regex; use spacetimedb_datastore::locking_tx_datastore::state_view::StateView; use spacetimedb_execution::Datastore; use spacetimedb_lib::identity::AuthCtx; use spacetimedb_subscription::SubscriptionPlan; -use spacetimedb_vm::expr::{self, Crud, CrudExpr, QueryExpr}; - -use super::execution_unit::QueryHash; -use super::module_subscription_manager::Plan; static WHITESPACE: Lazy = Lazy::new(|| Regex::new(r"^\s*$").unwrap()); static SUBSCRIBE_TO_ALL_TABLES_REGEX: Lazy = @@ -28,60 +24,7 @@ pub fn is_subscribe_to_all_tables(sql: &str) -> bool { SUBSCRIBE_TO_ALL_TABLES_REGEX.is_match_at(sql, 0) } -// TODO: Remove this after the SubscribeSingle migration. -// TODO: It's semantically wrong to `SELECT * FROM *` -// as it can only return back the changes valid for the tables in scope *right now* -// instead of **continuously updating** the db changes -// with system table modifications (add/remove tables, indexes, ...). -// -/// Variant of [`compile_read_only_query`] which appends `SourceExpr`s into a given `SourceBuilder`, -/// rather than returning a new `SourceSet`. -/// -/// This is necessary when merging multiple SQL queries into a single query set, -/// as in [`crate::subscription::module_subscription_actor::ModuleSubscriptions::add_subscriber`]. -pub fn compile_read_only_queryset( - relational_db: &RelationalDB, - auth: &AuthCtx, - tx: &Tx, - input: &str, -) -> Result, DBError> { - let input = input.trim(); - if input.is_empty() { - return Err(SubscriptionError::Empty.into()); - } - - // Remove redundant whitespace, and in particular newlines, for debug info. - let input = WHITESPACE.replace_all(input, " "); - - let compiled = compile_sql(relational_db, auth, tx, &input)?; - let mut queries = Vec::with_capacity(compiled.len()); - for q in compiled { - return Err(SubscriptionError::SideEffect(match q { - CrudExpr::Query(x) => { - queries.push(x); - continue; - } - CrudExpr::Insert { .. } => Crud::Insert, - CrudExpr::Update { .. } => Crud::Update, - CrudExpr::Delete { .. } => Crud::Delete, - CrudExpr::SetVar { .. } => Crud::Config, - CrudExpr::ReadVar { .. } => Crud::Config, - }) - .into()); - } - - if !queries.is_empty() { - Ok(queries - .into_iter() - .map(|query| SupportedQuery::new(query, input.to_string())) - .collect::>()?) - } else { - Err(SubscriptionError::Empty.into()) - } -} - /// Compile a string into a single read-only query. -/// This returns an error if the string has multiple queries or mutations. pub fn compile_read_only_query(auth: &AuthCtx, tx: &Tx, input: &str) -> Result { if is_whitespace_or_empty(input) { return Err(SubscriptionError::Empty.into()); @@ -93,8 +36,7 @@ pub fn compile_read_only_query(auth: &AuthCtx, tx: &Tx, input: &str) -> Result

( auth: &AuthCtx, tx: &Tx, @@ -110,1343 +52,7 @@ pub fn compile_query_with_hashes( let (plans, has_param) = SubscriptionPlan::compile(input, &tx, auth)?; if auth.bypass_rls() || has_param { - // Note that when generating hashes for queries from owners, - // we always treat them as if they were parameterized by :sender. - // This is because RLS is not applicable to owners. - // Hence owner hashes must never overlap with client hashes. return Ok(Plan::new(plans, hash_with_param, input.to_owned())); } Ok(Plan::new(plans, hash, input.to_owned())) } - -/// The kind of [`QueryExpr`] currently supported for incremental evaluation. -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] -pub enum Supported { - /// A scan or [`QueryExpr::Select`] of a single table. - Select, - /// A semijoin of two tables, restricted to [`QueryExpr::IndexJoin`]s. - /// - /// See [`crate::sql::compiler::try_index_join`]. - Semijoin, -} - -/// Classify a [`QueryExpr`] into a [`Supported`] kind, or `None` if incremental -/// evaluation is not currently supported for the expression. -pub fn classify(expr: &QueryExpr) -> Option { - use expr::Query::*; - if matches!(&*expr.query, [IndexJoin(_)]) { - return Some(Supported::Semijoin); - } - for op in &expr.query { - if let JoinInner(_) = op { - return None; - } - } - Some(Supported::Select) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::db::relational_db::tests_utils::{ - begin_mut_tx, begin_tx, insert, with_auto_commit, with_read_only, TestDB, - }; - use crate::db::relational_db::MutTx; - use crate::host::module_host::{DatabaseTableUpdate, DatabaseUpdate, UpdatesRelValue}; - use crate::sql::execute::collect_result; - use crate::sql::execute::tests::run_for_testing; - use crate::subscription::module_subscription_manager::QueriedTableIndexIds; - use crate::subscription::row_list_builder_pool::BsatnRowListBuilderPool; - use crate::subscription::subscription::{legacy_get_all, ExecutionSet}; - use crate::subscription::tx::DeltaTx; - use crate::vm::tests::create_table_with_rows; - use crate::vm::DbProgram; - use itertools::Itertools; - use smallvec::SmallVec; - use spacetimedb_client_api_messages::websocket::v1 as ws_v1; - use spacetimedb_data_structures::map::{HashCollectionExt as _, HashMap}; - use spacetimedb_datastore::execution_context::Workload; - use spacetimedb_lib::bsatn; - use spacetimedb_lib::db::auth::{StAccess, StTableType}; - use spacetimedb_lib::error::ResultTest; - use spacetimedb_lib::identity::AuthCtx; - use spacetimedb_lib::metrics::ExecutionMetrics; - use spacetimedb_lib::Identity; - use spacetimedb_primitives::{ColId, TableId}; - use spacetimedb_sats::{product, AlgebraicType, ProductType, ProductValue}; - use spacetimedb_schema::relation::FieldName; - use spacetimedb_schema::schema::*; - use spacetimedb_schema::table_name::TableName; - use spacetimedb_vm::eval::run_ast; - use spacetimedb_vm::eval::test_helpers::{mem_table, mem_table_without_table_name, scalar}; - use spacetimedb_vm::expr::{Expr, SourceSet}; - use spacetimedb_vm::operator::OpCmp; - use spacetimedb_vm::relation::{MemTable, RelValue}; - use std::sync::Arc; - - /// Runs a query that evaluates if the changes made should be reported to the [ModuleSubscriptionManager] - fn run_query( - db: &RelationalDB, - tx: &Tx, - query: &QueryExpr, - auth: AuthCtx, - sources: SourceSet, N>, - ) -> Result, DBError> { - let mut tx = tx.into(); - let p = &mut DbProgram::new(db, &mut tx, auth); - let q = Expr::Crud(Box::new(CrudExpr::Query(query.clone()))); - - let mut result = Vec::with_capacity(1); - let mut updates = SmallVec::new(); - collect_result(&mut result, &mut updates, run_ast(p, q, sources).into())?; - Ok(result) - } - - fn insert_op(table_id: TableId, table_name: &str, row: ProductValue) -> DatabaseTableUpdate { - DatabaseTableUpdate { - table_id, - table_name: TableName::for_test(table_name), - deletes: [].into(), - inserts: [row].into(), - } - } - - fn delete_op(table_id: TableId, table_name: &str, row: ProductValue) -> DatabaseTableUpdate { - DatabaseTableUpdate { - table_id, - table_name: TableName::for_test(table_name), - deletes: [row].into(), - inserts: [].into(), - } - } - - fn insert_row(db: &RelationalDB, tx: &mut MutTx, table_id: TableId, row: ProductValue) -> ResultTest<()> { - insert(db, tx, table_id, &row)?; - Ok(()) - } - - fn delete_row(db: &RelationalDB, tx: &mut MutTx, table_id: TableId, row: ProductValue) { - db.delete_by_rel(tx, table_id, [row]); - } - - fn make_data( - db: &RelationalDB, - tx: &mut MutTx, - table_name: &str, - head: &ProductType, - row: &ProductValue, - access: StAccess, - ) -> ResultTest<(Arc, MemTable, DatabaseTableUpdate, QueryExpr)> { - let schema = create_table_with_rows(db, tx, table_name, head.clone(), std::slice::from_ref(row), access)?; - let table = mem_table(schema.table_id, schema.get_row_type().clone(), [row.clone()]); - - let data = DatabaseTableUpdate { - table_id: schema.table_id, - table_name: TableName::for_test(table_name), - deletes: [].into(), - inserts: [row.clone()].into(), - }; - - let q = QueryExpr::new(&*schema); - - Ok((schema, table, data, q)) - } - - fn make_inv( - db: &RelationalDB, - tx: &mut MutTx, - access: StAccess, - ) -> ResultTest<(Arc, MemTable, DatabaseTableUpdate, QueryExpr)> { - let head = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let row = product!(1u64, "health"); - - let (schema, table, data, q) = make_data(db, tx, "inventory", &head, &row, access)?; - - let fields = &[0, 1].map(|c| FieldName::new(schema.table_id, c.into()).into()); - let q = q.with_project(fields.into(), None).unwrap(); - - Ok((schema, table, data, q)) - } - - fn make_player( - db: &RelationalDB, - tx: &mut MutTx, - ) -> ResultTest<(Arc, MemTable, DatabaseTableUpdate, QueryExpr)> { - let table_name = "player"; - let head = ProductType::from([("player_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let row = product!(2u64, "jhon doe"); - - let (schema, table, data, q) = make_data(db, tx, table_name, &head, &row, StAccess::Public)?; - - let fields = [0, 1].map(|c| FieldName::new(schema.table_id, c.into()).into()); - let q = q.with_project(fields.into(), None).unwrap(); - - Ok((schema, table, data, q)) - } - - /// Replace the primary (ie. `source`) table of the given [`QueryExpr`] with - /// a virtual [`MemTable`] consisting of the rows in [`DatabaseTableUpdate`]. - fn query_to_mem_table( - mut of: QueryExpr, - data: &DatabaseTableUpdate, - ) -> (QueryExpr, SourceSet, 1>) { - let data = data.deletes.iter().chain(data.inserts.iter()).cloned().collect(); - let mem_table = MemTable::new(of.head().clone(), of.source.table_access(), data); - let mut sources = SourceSet::empty(); - of.source = sources.add_mem_table(mem_table); - (of, sources) - } - - fn check_query( - db: &RelationalDB, - table: &MemTable, - tx: &Tx, - q: &QueryExpr, - data: &DatabaseTableUpdate, - ) -> ResultTest<()> { - let (q, sources) = query_to_mem_table(q.clone(), data); - let result = run_query(db, tx, &q, AuthCtx::for_testing(), sources)?; - - assert_eq!( - Some(mem_table_without_table_name(table)), - result.first().map(mem_table_without_table_name) - ); - - Ok(()) - } - - fn check_query_incr( - db: &RelationalDB, - tx: &Tx, - s: &ExecutionSet, - update: &DatabaseUpdate, - total_tables: usize, - rows: &[ProductValue], - ) -> ResultTest<()> { - let tx = &tx.into(); - let update = update.tables.iter().collect::>(); - let result = s.eval_incr_for_test(db, tx, &update, None); - assert_eq!( - result.tables.len(), - total_tables, - "Must return the correct number of tables: {result:#?}" - ); - - let result = result - .tables - .iter() - .map(|u| &u.updates) - .flat_map(|u| { - u.deletes - .iter() - .chain(&*u.inserts) - .map(|rv| rv.clone().into_product_value()) - .collect::>() - }) - .sorted() - .collect::>(); - - assert_eq!(result, rows, "Must return the correct row(s)"); - - Ok(()) - } - - fn check_query_eval( - db: &RelationalDB, - tx: &Tx, - s: &ExecutionSet, - total_tables: usize, - rows: &[ProductValue], - ) -> ResultTest<()> { - let result = s - .eval::(db, tx, &BsatnRowListBuilderPool::new(), None, ws_v1::Compression::None) - .tables; - assert_eq!( - result.len(), - total_tables, - "Must return the correct number of tables: {result:#?}" - ); - - let result = result - .into_iter() - .flat_map(|x| x.updates) - .map(|x| match x { - ws_v1::CompressableQueryUpdate::Uncompressed(x) => x, - _ => unreachable!(), - }) - .flat_map(|x| { - (&x.deletes) - .into_iter() - .chain(&x.inserts) - .map(|x| x.to_owned()) - .collect::>() - }) - .sorted() - .collect_vec(); - - let rows = rows.iter().map(|r| bsatn::to_vec(r).unwrap()).collect_vec(); - - assert_eq!(result, rows, "Must return the correct row(s)"); - - Ok(()) - } - - fn singleton_execution_set(expr: QueryExpr, sql: String) -> ResultTest { - Ok(ExecutionSet::from_iter([SupportedQuery::try_from((expr, sql))?])) - } - - #[test] - fn test_whitespace_regex() -> ResultTest<()> { - assert!(is_whitespace_or_empty("")); - assert!(is_whitespace_or_empty(" ")); - assert!(is_whitespace_or_empty("\n \t")); - assert!(!is_whitespace_or_empty(" a")); - Ok(()) - } - - #[test] - fn test_subscribe_to_all_tables_regex() -> ResultTest<()> { - assert!(is_subscribe_to_all_tables("SELECT * FROM *")); - assert!(is_subscribe_to_all_tables("Select * From *")); - assert!(is_subscribe_to_all_tables("select * from *")); - assert!(is_subscribe_to_all_tables("\nselect *\nfrom * ")); - assert!(!is_subscribe_to_all_tables("select * from * where")); - Ok(()) - } - - #[test] - fn test_compile_incr_plan() -> ResultTest<()> { - let db = TestDB::durable()?; - - let schema = &[("n", AlgebraicType::U64), ("data", AlgebraicType::U64)]; - let indexes = &[0.into()]; - db.create_table_for_test("a", schema, indexes)?; - db.create_table_for_test("b", schema, indexes)?; - - let tx = begin_tx(&db); - let sql = "SELECT b.* FROM b JOIN a ON b.n = a.n WHERE b.data > 200"; - let result = compile_read_only_query(&AuthCtx::for_testing(), &tx, sql); - assert!(result.is_ok()); - Ok(()) - } - - #[test] - fn test_eval_incr_for_index_scan() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [test] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let table_id = db.create_table_for_test("test", schema, indexes)?; - - let mut tx = begin_mut_tx(&db); - let mut deletes = Vec::new(); - for i in 0u64..9u64 { - insert(&db, &mut tx, table_id, &product!(i, i))?; - deletes.push(product!(i + 10, i)) - } - - let update = DatabaseUpdate { - tables: [DatabaseTableUpdate { - table_id, - table_name: TableName::for_test("test"), - deletes: deletes.into(), - inserts: [].into(), - }] - .into(), - }; - - db.commit_tx(tx)?; - let tx = begin_tx(&db); - - let sql = "select * from test where b = 3"; - let mut exp = compile_sql(&db, &AuthCtx::for_testing(), &tx, sql)?; - - let Some(CrudExpr::Query(query)) = exp.pop() else { - panic!("unexpected query {:#?}", exp[0]); - }; - - let query: ExecutionSet = singleton_execution_set(query, sql.into())?; - - let tx = (&tx).into(); - let update = update.tables.iter().collect::>(); - let result = query.eval_incr_for_test(&db, &tx, &update, None); - - assert_eq!(result.tables.len(), 1); - - let update = &result.tables[0].updates; - - assert_eq!(update.inserts.len(), 0); - assert_eq!(update.deletes.len(), 1); - - let op = &update.deletes[0]; - - assert_eq!(op.clone().into_product_value(), product!(13u64, 3u64)); - Ok(()) - } - - #[test] - fn test_subscribe() -> ResultTest<()> { - let db = TestDB::durable()?; - - let mut tx = begin_mut_tx(&db); - - let (schema, table, data, q) = make_inv(&db, &mut tx, StAccess::Public)?; - db.commit_tx(tx)?; - assert_eq!(schema.table_type, StTableType::User); - assert_eq!(schema.table_access, StAccess::Public); - - let tx = begin_tx(&db); - let q_1 = q.clone(); - check_query(&db, &table, &tx, &q_1, &data)?; - - let q_2 = q - .with_select_cmp(OpCmp::Eq, FieldName::new(schema.table_id, 0.into()), scalar(1u64)) - .unwrap(); - check_query(&db, &table, &tx, &q_2, &data)?; - - Ok(()) - } - - #[test] - fn test_subscribe_private() -> ResultTest<()> { - let db = TestDB::durable()?; - - let mut tx = begin_mut_tx(&db); - - let (schema, table, data, q) = make_inv(&db, &mut tx, StAccess::Private)?; - db.commit_tx(tx)?; - assert_eq!(schema.table_type, StTableType::User); - assert_eq!(schema.table_access, StAccess::Private); - - let row = product!(1u64, "health"); - let tx = begin_tx(&db); - check_query(&db, &table, &tx, &q, &data)?; - - // SELECT * FROM inventory WHERE inventory_id = 1 - let q_id = QueryExpr::new(&*schema) - .with_select_cmp(OpCmp::Eq, FieldName::new(schema.table_id, 0.into()), scalar(1u64)) - .unwrap(); - - let s = singleton_execution_set(q_id, "SELECT * FROM inventory WHERE inventory_id = 1".into())?; - - let data = DatabaseTableUpdate { - table_id: schema.table_id, - table_name: TableName::for_test("inventory"), - deletes: [].into(), - inserts: [row.clone()].into(), - }; - - let update = DatabaseUpdate { - tables: [data.clone()].into(), - }; - - check_query_incr(&db, &tx, &s, &update, 1, &[row])?; - - let q = QueryExpr::new(&*schema); - - let (q, sources) = query_to_mem_table(q, &data); - //Try access the private table - match run_query( - &db, - &tx, - &q, - AuthCtx::new(Identity::__dummy(), Identity::from_byte_array([1u8; 32])), - sources, - ) { - Ok(_) => { - panic!("it allows to execute against private table") - } - Err(err) => { - if err.get_auth_error().is_none() { - panic!("fail to report an `auth` violation for private table, it gets {err}") - } - } - } - - Ok(()) - } - - #[test] - fn test_subscribe_sql() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [MobileEntityState] - let schema = &[ - ("entity_id", AlgebraicType::U64), - ("location_x", AlgebraicType::I32), - ("location_z", AlgebraicType::I32), - ("destination_x", AlgebraicType::I32), - ("destination_z", AlgebraicType::I32), - ("is_running", AlgebraicType::Bool), - ("timestamp", AlgebraicType::U64), - ("dimension", AlgebraicType::U32), - ]; - let indexes = &[0.into(), 1.into(), 2.into()]; - db.create_table_for_test("MobileEntityState", schema, indexes)?; - - // Create table [EnemyState] - let schema = &[ - ("entity_id", AlgebraicType::U64), - ("herd_id", AlgebraicType::I32), - ("status", AlgebraicType::I32), - ("type", AlgebraicType::I32), - ("direction", AlgebraicType::I32), - ]; - let indexes = &[0.into()]; - db.create_table_for_test("EnemyState", schema, indexes)?; - - for sql_insert in [ - "insert into MobileEntityState (entity_id, location_x, location_z, destination_x, destination_z, is_running, timestamp, dimension) values (1, 96001, 96001, 96001, 1867045146, false, 17167179743690094247, 3926297397)", - "insert into MobileEntityState (entity_id, location_x, location_z, destination_x, destination_z, is_running, timestamp, dimension) values (2, 96001, 191000, 191000, 1560020888, true, 2947537077064292621, 445019304)", - "insert into EnemyState (entity_id, herd_id, status, type, direction) values (1, 1181485940, 1633678837, 1158301365, 132191327)", - "insert into EnemyState (entity_id, herd_id, status, type, direction) values (2, 2017368418, 194072456, 34423057, 1296770410)"] { - run_for_testing(&db, sql_insert)?; - } - - let sql_query = "\ - SELECT EnemyState.* FROM EnemyState \ - JOIN MobileEntityState ON MobileEntityState.entity_id = EnemyState.entity_id \ - WHERE MobileEntityState.location_x > 96000 \ - AND MobileEntityState.location_x < 192000 \ - AND MobileEntityState.location_z > 96000 \ - AND MobileEntityState.location_z < 192000"; - - let tx = begin_tx(&db); - let qset = compile_read_only_queryset(&db, &AuthCtx::for_testing(), &tx, sql_query)?; - - for q in qset { - let result = run_query( - &db, - &tx, - q.as_expr(), - AuthCtx::for_testing(), - SourceSet::<_, 0>::empty(), - )?; - assert_eq!(result.len(), 1, "Join query did not return any rows"); - } - - Ok(()) - } - - #[test] - fn test_subscribe_all() -> ResultTest<()> { - let db = TestDB::durable()?; - - let mut tx = begin_mut_tx(&db); - - let (schema_1, _, _, _) = make_inv(&db, &mut tx, StAccess::Public)?; - let (schema_2, _, _, _) = make_player(&db, &mut tx)?; - db.commit_tx(tx)?; - let row_1 = product!(1u64, "health"); - let row_2 = product!(2u64, "jhon doe"); - let tx = db.begin_tx(Workload::Subscribe); - let s = legacy_get_all(&db, &tx, &AuthCtx::for_testing())?.into(); - check_query_eval(&db, &tx, &s, 2, &[row_1.clone(), row_2.clone()])?; - - let data1 = DatabaseTableUpdate { - table_id: schema_1.table_id, - table_name: TableName::for_test("inventory"), - deletes: [row_1].into(), - inserts: [].into(), - }; - - let data2 = DatabaseTableUpdate { - table_id: schema_2.table_id, - table_name: TableName::for_test("player"), - deletes: [].into(), - inserts: [row_2].into(), - }; - - let update = DatabaseUpdate { - tables: smallvec::smallvec![data1, data2], - }; - - let row_1 = product!(1u64, "health"); - let row_2 = product!(2u64, "jhon doe"); - check_query_incr(&db, &tx, &s, &update, 2, &[row_1, row_2])?; - - Ok(()) - } - - #[test] - fn test_classify() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [plain] - let schema = &[("id", AlgebraicType::U64)]; - db.create_table_for_test("plain", schema, &[])?; - - // Create table [lhs] with indexes on [id] and [x] - let schema = &[("id", AlgebraicType::U64), ("x", AlgebraicType::I32)]; - let indexes = &[ColId(0), ColId(1)]; - db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with indexes on [id] and [y] - let schema = &[("id", AlgebraicType::U64), ("y", AlgebraicType::I32)]; - let indexes = &[ColId(0), ColId(1)]; - db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - - // All single table queries are supported - let scans = [ - "SELECT * FROM plain", - "SELECT * FROM plain WHERE id > 5", - "SELECT plain.* FROM plain", - "SELECT plain.* FROM plain WHERE plain.id = 5", - "SELECT * FROM lhs", - "SELECT * FROM lhs WHERE id > 5", - ]; - for scan in scans { - let expr = compile_read_only_queryset(&db, &AuthCtx::for_testing(), &tx, scan)? - .pop() - .unwrap(); - assert_eq!(expr.kind(), Supported::Select, "{scan}\n{expr:#?}"); - } - - // Only index semijoins are supported - let joins = ["SELECT lhs.* FROM lhs JOIN rhs ON lhs.id = rhs.id WHERE rhs.y < 10"]; - for join in joins { - let expr = compile_read_only_queryset(&db, &AuthCtx::for_testing(), &tx, join)? - .pop() - .unwrap(); - assert_eq!(expr.kind(), Supported::Semijoin, "{join}\n{expr:#?}"); - } - - // All other joins are unsupported - let joins = [ - "SELECT lhs.* FROM lhs JOIN rhs ON lhs.id = rhs.id", - "SELECT * FROM lhs JOIN rhs ON lhs.id = rhs.id", - "SELECT * FROM lhs JOIN rhs ON lhs.id = rhs.id WHERE lhs.x < 10", - ]; - for join in joins { - match compile_read_only_queryset(&db, &AuthCtx::for_testing(), &tx, join) { - Err(DBError::Subscription(SubscriptionError::Unsupported(_)) | DBError::TypeError(_)) => (), - x => panic!("Unexpected: {x:?}"), - } - } - - Ok(()) - } - - /// Create table [lhs] with index on [id] - fn create_lhs_table_for_eval_incr(db: &RelationalDB) -> ResultTest { - const I32: AlgebraicType = AlgebraicType::I32; - let lhs_id = db.create_table_for_test("lhs", &[("id", I32), ("x", I32)], &[0.into()])?; - with_auto_commit(db, |tx| { - for i in 0..5 { - let row = product!(i, i + 5); - insert(db, tx, lhs_id, &row)?; - } - Ok(lhs_id) - }) - } - - /// Create table [rhs] with index on [id] - fn create_rhs_table_for_eval_incr(db: &RelationalDB) -> ResultTest { - const I32: AlgebraicType = AlgebraicType::I32; - let rhs_id = db.create_table_for_test("rhs", &[("rid", I32), ("id", I32), ("y", I32)], &[1.into()])?; - with_auto_commit(db, |tx| { - for i in 10..20 { - let row = product!(i, i - 10, i - 8); - insert(db, tx, rhs_id, &row)?; - } - Ok(rhs_id) - }) - } - - fn compile_query(db: &RelationalDB) -> ResultTest { - with_read_only(db, |tx| { - let auth = AuthCtx::for_testing(); - let tx = SchemaViewer::new(tx, &auth); - // Should be answered using an index semijion - let sql = "select lhs.* from lhs join rhs on lhs.id = rhs.id where rhs.y >= 2 and rhs.y <= 4"; - Ok(SubscriptionPlan::compile(sql, &tx, &auth) - .map(|(mut plans, _)| { - assert_eq!(plans.len(), 1); - plans.pop().unwrap() - }) - .unwrap()) - }) - } - - fn run_eval_incr_test ResultTest>(test_fn: F) -> ResultTest { - TestDB::durable().map(|db| test_fn(&db))??; - TestDB::durable().map(|db| test_fn(&db.with_row_count(Arc::new(|_, _| 5))))? - } - - #[test] - /// TODO: This test is a slight modification of [test_eval_incr_for_index_join]. - /// Essentially the WHERE condition is on different tables. - /// Should refactor to reduce duplicate logic between the two tests. - fn test_eval_incr_for_left_semijoin() -> ResultTest<()> { - fn compile_query(db: &RelationalDB) -> ResultTest { - with_read_only(db, |tx| { - let auth = AuthCtx::for_testing(); - let tx = SchemaViewer::new(tx, &auth); - // Should be answered using an index semijion - let sql = "select lhs.* from lhs join rhs on lhs.id = rhs.id where lhs.x >= 5 and lhs.x <= 7"; - Ok(SubscriptionPlan::compile(sql, &tx, &auth) - .map(|(mut plans, _)| { - assert_eq!(plans.len(), 1); - plans.pop().unwrap() - }) - .unwrap()) - }) - } - - // Case 1: - // Delete a row inside the region of lhs, - // Insert a row inside the region of lhs. - fn index_join_case_1(db: &RelationalDB) -> ResultTest<()> { - let _ = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(10, 0, 2); - let r2 = product!(10, 0, 3); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // No updates to report - assert!(result.is_empty()); - Ok(()) - } - - // Case 2: - // Delete a row outside the region of lhs, - // Insert a row outside the region of lhs. - fn index_join_case_2(db: &RelationalDB) -> ResultTest<()> { - let _ = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(13, 3, 5); - let r2 = product!(13, 4, 6); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // No updates to report - assert!(result.is_empty()); - Ok(()) - } - - // Case 3: - // Delete a row inside the region of lhs, - // Insert a row outside the region of lhs. - fn index_join_case_3(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(10, 0, 2); - let r2 = product!(10, 3, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // A single delete from lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], delete_op(lhs_id, "lhs", product!(0, 5))); - Ok(()) - } - - // Case 4: - // Delete a row outside the region of lhs, - // Insert a row inside the region of lhs. - fn index_join_case_4(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(13, 3, 5); - let r2 = product!(13, 2, 4); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // A single insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], insert_op(lhs_id, "lhs", product!(2, 7))); - Ok(()) - } - - // Case 5: - // Insert row into rhs, - // Insert matching row inside the region of lhs. - fn index_join_case_5(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(5, 6); - let rhs_row = product!(20, 5, 3); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, true), (rhs_id, rhs_row, true)], - )?; - - // A single insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], insert_op(lhs_id, "lhs", product!(5, 6))); - Ok(()) - } - - // Case 6: - // Insert row into rhs, - // Insert matching row outside the region of lhs. - fn index_join_case_6(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(5, 10); - let rhs_row = product!(20, 5, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, true), (rhs_id, rhs_row, true)], - )?; - - // No updates to report - assert_eq!(result.tables.len(), 0); - Ok(()) - } - - // Case 7: - // Delete row from rhs, - // Delete matching row inside the region of lhs. - fn index_join_case_7(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(0, 5); - let rhs_row = product!(10, 0, 2); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, false), (rhs_id, rhs_row, false)], - )?; - - // A single delete from lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], delete_op(lhs_id, "lhs", product!(0, 5))); - Ok(()) - } - - // Case 8: - // Delete row from rhs, - // Delete matching row outside the region of lhs. - fn index_join_case_8(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(3, 8); - let rhs_row = product!(13, 3, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, false), (rhs_id, rhs_row, false)], - )?; - - // No updates to report - assert_eq!(result.tables.len(), 0); - Ok(()) - } - - // Case 9: - // Update row from rhs, - // Update matching row inside the region of lhs. - fn index_join_case_9(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_old = product!(1, 6); - let lhs_new = product!(1, 7); - let rhs_old = product!(11, 1, 3); - let rhs_new = product!(11, 1, 4); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![ - (lhs_id, lhs_old, false), - (rhs_id, rhs_old, false), - (lhs_id, lhs_new, true), - (rhs_id, rhs_new, true), - ], - )?; - - let lhs_old = product!(1, 6); - let lhs_new = product!(1, 7); - - // A delete and an insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!( - result.tables[0], - DatabaseTableUpdate { - table_id: lhs_id, - table_name: TableName::for_test("lhs"), - deletes: [lhs_old].into(), - inserts: [lhs_new].into(), - }, - ); - Ok(()) - } - - run_eval_incr_test(index_join_case_1)?; - run_eval_incr_test(index_join_case_2)?; - run_eval_incr_test(index_join_case_3)?; - run_eval_incr_test(index_join_case_4)?; - run_eval_incr_test(index_join_case_5)?; - run_eval_incr_test(index_join_case_6)?; - run_eval_incr_test(index_join_case_7)?; - run_eval_incr_test(index_join_case_8)?; - run_eval_incr_test(index_join_case_9)?; - Ok(()) - } - - #[test] - fn test_eval_incr_for_index_join() -> ResultTest<()> { - // Case 1: - // Delete a row inside the region of rhs, - // Insert a row inside the region of rhs. - run_eval_incr_test(index_join_case_1)?; - // Case 2: - // Delete a row outside the region of rhs, - // Insert a row outside the region of rhs. - run_eval_incr_test(index_join_case_2)?; - // Case 3: - // Delete a row inside the region of rhs, - // Insert a row outside the region of rhs. - run_eval_incr_test(index_join_case_3)?; - // Case 4: - // Delete a row outside the region of rhs, - // Insert a row inside the region of rhs. - run_eval_incr_test(index_join_case_4)?; - // Case 5: - // Insert row into lhs, - // Insert matching row inside the region of rhs. - run_eval_incr_test(index_join_case_5)?; - // Case 6: - // Insert row into lhs, - // Insert matching row outside the region of rhs. - run_eval_incr_test(index_join_case_6)?; - // Case 7: - // Delete row from lhs, - // Delete matching row inside the region of rhs. - run_eval_incr_test(index_join_case_7)?; - // Case 8: - // Delete row from lhs, - // Delete matching row outside the region of rhs. - run_eval_incr_test(index_join_case_8)?; - // Case 9: - // Update row from lhs, - // Update matching row inside the region of rhs. - run_eval_incr_test(index_join_case_9)?; - Ok(()) - } - - fn eval_incr( - db: &RelationalDB, - metrics: &mut ExecutionMetrics, - plan: &SubscriptionPlan, - ops: Vec<(TableId, ProductValue, bool)>, - ) -> ResultTest { - let mut tx = begin_mut_tx(db); - - for (table_id, row, insert) in ops { - if insert { - insert_row(db, &mut tx, table_id, row)?; - } else { - delete_row(db, &mut tx, table_id, row); - } - } - - let (data, _, tx) = db.commit_tx_downgrade(tx, Workload::ForTests); - let table_id = plan.subscribed_table_id(); - let table_name = plan.subscribed_table_name().clone(); - let tx = DeltaTx::new(&tx, &data, &QueriedTableIndexIds::from_iter(plan.index_ids())); - - // IMPORTANT: FOR TESTING ONLY! - // - // This utility implements set semantics for incremental updates. - // This is safe because we are only testing PK/FK joins, - // and we don't have to track row multiplicities for PK/FK joins. - // But in general we must assume bag semantics for server side tests. - let mut eval_delta = || { - // Note, we can't determine apriori what capacity to allocate - let mut inserts = HashMap::new(); - let mut deletes = vec![]; - - plan.for_each_insert(&tx, metrics, &mut |row| { - inserts - .entry(RelValue::from(row)) - // Row already inserted? - // Increment its multiplicity. - .and_modify(|n| *n += 1) - .or_insert(1); - Ok(()) - }) - .unwrap(); - - plan.for_each_delete(&tx, metrics, &mut |row| { - let row = RelValue::from(row); - match inserts.get_mut(&row) { - // This row was not inserted. - // Add it to the delete set. - None => { - deletes.push(row); - } - // This row was inserted. - // Decrement the multiplicity. - Some(1) => { - inserts.remove(&row); - } - // This row was inserted. - // Decrement the multiplicity. - Some(n) => { - *n -= 1; - } - } - Ok(()) - }) - .unwrap(); - - UpdatesRelValue { - inserts: inserts.into_keys().collect(), - deletes, - } - }; - - let updates = eval_delta(); - - let inserts = updates - .inserts - .into_iter() - .map(RelValue::into_product_value) - .collect::>(); - let deletes = updates - .deletes - .into_iter() - .map(RelValue::into_product_value) - .collect::>(); - - let tables = if inserts.is_empty() && deletes.is_empty() { - smallvec::smallvec![] - } else { - smallvec::smallvec![DatabaseTableUpdate { - table_id, - table_name, - inserts, - deletes, - }] - }; - Ok(DatabaseUpdate { tables }) - } - - // Case 1: - // Delete a row inside the region of rhs, - // Insert a row inside the region of rhs. - fn index_join_case_1(db: &RelationalDB) -> ResultTest<()> { - let _ = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(10, 0, 2); - let r2 = product!(10, 0, 3); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // No updates to report - assert!(result.is_empty()); - - // The lhs row must always probe the rhs index. - // The rhs row passes the rhs filter, - // resulting in a probe of the rhs index. - assert_eq!(metrics.index_seeks, 2); - Ok(()) - } - - // Case 2: - // Delete a row outside the region of rhs, - // Insert a row outside the region of rhs. - fn index_join_case_2(db: &RelationalDB) -> ResultTest<()> { - let _ = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(13, 3, 5); - let r2 = product!(13, 3, 6); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // No updates to report - assert!(result.is_empty()); - - // The lhs row must always probe the rhs index. - // The rhs row doesn't pass the rhs filter, - // hence it doesn't survive to probe the lhs index. - assert_eq!(metrics.index_seeks, 0); - Ok(()) - } - - // Case 3: - // Delete a row inside the region of rhs, - // Insert a row outside the region of rhs. - fn index_join_case_3(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(10, 0, 2); - let r2 = product!(10, 0, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // A single delete from lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], delete_op(lhs_id, "lhs", product!(0, 5))); - - // One row passes the rhs filter, the other does not. - // This results in a single probe of the lhs index. - assert_eq!(metrics.index_seeks, 1); - Ok(()) - } - - // Case 4: - // Delete a row outside the region of rhs, - // Insert a row inside the region of rhs. - fn index_join_case_4(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let r1 = product!(13, 3, 5); - let r2 = product!(13, 3, 4); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr(db, &mut metrics, &query, vec![(rhs_id, r1, false), (rhs_id, r2, true)])?; - - // A single insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], insert_op(lhs_id, "lhs", product!(3, 8))); - - // One row passes the rhs filter, the other does not. - // This results in a single probe of the lhs index. - assert_eq!(metrics.index_seeks, 1); - Ok(()) - } - - // Case 5: - // Insert row into lhs, - // Insert matching row inside the region of rhs. - fn index_join_case_5(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(5, 10); - let rhs_row = product!(20, 5, 3); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, true), (rhs_id, rhs_row, true)], - )?; - - // A single insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], insert_op(lhs_id, "lhs", product!(5, 10))); - - // Because we only have inserts, only 3 delta queries are evaluated, - // each one an index join, and each one probing the join index exactly once. - assert_eq!(metrics.index_seeks, 3); - Ok(()) - } - - // Case 6: - // Insert row into lhs, - // Insert matching row outside the region of rhs. - fn index_join_case_6(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(5, 10); - let rhs_row = product!(20, 5, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, true), (rhs_id, rhs_row, true)], - )?; - - // No updates to report - assert_eq!(result.tables.len(), 0); - - // Because we only have inserts, only 3 delta queries are evaluated, - // each one an index join, and each one probing the join index at most once. - // - // The lhs row always probes the rhs index, - // but the rhs row doesn't pass the rhs filter, - // hence it doesn't survive to probe the lhs index. - assert_eq!(metrics.index_seeks, 2); - Ok(()) - } - - // Case 7: - // Delete row from lhs, - // Delete matching row inside the region of rhs. - fn index_join_case_7(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(0, 5); - let rhs_row = product!(10, 0, 2); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, false), (rhs_id, rhs_row, false)], - )?; - - // A single delete from lhs - assert_eq!(result.tables.len(), 1); - assert_eq!(result.tables[0], delete_op(lhs_id, "lhs", product!(0, 5))); - - // Because we only have inserts, only 3 delta queries are evaluated, - // each one an index join, and each one probing the join index exactly once. - assert_eq!(metrics.index_seeks, 3); - Ok(()) - } - - // Case 8: - // Delete row from lhs, - // Delete matching row outside the region of rhs. - fn index_join_case_8(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_row = product!(3, 8); - let rhs_row = product!(13, 3, 5); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![(lhs_id, lhs_row, false), (rhs_id, rhs_row, false)], - )?; - - // No updates to report - assert_eq!(result.tables.len(), 0); - - // Because we only have inserts, only 3 delta queries are evaluated, - // each one an index join, and each one probing the join index at most once. - // - // The lhs row always probes the rhs index, - // but the rhs row doesn't pass the rhs filter, - // hence it doesn't survive to probe the lhs index. - assert_eq!(metrics.index_seeks, 2); - Ok(()) - } - - // Case 9: - // Update row from lhs, - // Update matching row inside the region of rhs. - fn index_join_case_9(db: &RelationalDB) -> ResultTest<()> { - let lhs_id = create_lhs_table_for_eval_incr(db)?; - let rhs_id = create_rhs_table_for_eval_incr(db)?; - let query = compile_query(db)?; - - let lhs_old = product!(1, 6); - let lhs_new = product!(1, 7); - let rhs_old = product!(11, 1, 3); - let rhs_new = product!(11, 1, 4); - - let mut metrics = ExecutionMetrics::default(); - - let result = eval_incr( - db, - &mut metrics, - &query, - vec![ - (lhs_id, lhs_old, false), - (rhs_id, rhs_old, false), - (lhs_id, lhs_new, true), - (rhs_id, rhs_new, true), - ], - )?; - - let lhs_old = product!(1, 6); - let lhs_new = product!(1, 7); - - // A delete and an insert into lhs - assert_eq!(result.tables.len(), 1); - assert_eq!( - result.tables[0], - DatabaseTableUpdate { - table_id: lhs_id, - table_name: TableName::for_test("lhs"), - deletes: [lhs_old].into(), - inserts: [lhs_new].into(), - }, - ); - - // Because we have deletes and inserts for both tables, - // all 8 delta queries are evaluated, - // each one probing the join index exactly once. - assert_eq!(metrics.index_seeks, 8); - Ok(()) - } -} diff --git a/crates/core/src/subscription/subscription.rs b/crates/core/src/subscription/subscription.rs index 528aae82033..a9f3bd12f62 100644 --- a/crates/core/src/subscription/subscription.rs +++ b/crates/core/src/subscription/subscription.rs @@ -1,617 +1,16 @@ -//! # Subscription Evaluation -//! -//! This module defines how subscription queries are evaluated. -//! -//! A subscription query returns rows matching one or more SQL SELECT statements -//! alongside information about the affected table and an operation identifier -//! (insert or delete) -- a [`DatabaseUpdate`]. This allows subscribers to -//! maintain their own view of (virtual) tables matching the statements. -//! -//! When the [`Subscription`] is first established, all its queries are -//! evaluated against the database and the results are sent back to the -//! subscriber (see [`QuerySet::eval`]). Afterwards, the [`QuerySet`] is -//! evaluated [incrementally][`QuerySet::eval_incr`] whenever a transaction -//! commits updates to the database. -//! -//! Incremental evaluation is straightforward if a query selects from a single -//! table (`SELECT * FROM table WHERE ...`). For join queries, however, it is -//! not obvious how to compute the minimal set of operations for the client to -//! synchronize its state. In general, we conjecture that server-side -//! materialized views are necessary. We find, however, that a particular kind -//! of join query _can_ be evaluated incrementally without materialized views. - -use super::execution_unit::{ExecutionUnit, QueryHash}; +use super::execution_unit::QueryHash; use super::module_subscription_manager::Plan; -use super::query; -use crate::db::relational_db::{RelationalDB, Tx}; -use crate::error::{DBError, SubscriptionError}; -use crate::host::module_host::{DatabaseTableUpdate, DatabaseUpdateRelValue, UpdatesRelValue}; +use crate::db::relational_db::RelationalDB; +use crate::error::DBError; use crate::sql::ast::SchemaViewer; -use crate::subscription::websocket_building::{BuildableWebsocketFormat, RowListBuilderSource}; -use crate::vm::{build_query, TxMode}; -use anyhow::Context; -use itertools::Either; -use spacetimedb_client_api_messages::websocket::v1 as ws_v1; -use spacetimedb_data_structures::map::HashSet; use spacetimedb_datastore::locking_tx_datastore::state_view::StateView; -use spacetimedb_datastore::locking_tx_datastore::TxId; use spacetimedb_lib::db::auth::StTableType; use spacetimedb_lib::identity::AuthCtx; -use spacetimedb_primitives::TableId; -use spacetimedb_sats::ProductValue; -use spacetimedb_schema::def::error::AuthError; -use spacetimedb_schema::relation::DbTable; use spacetimedb_schema::schema::TableSchema; -use spacetimedb_schema::table_name::TableName; use spacetimedb_subscription::SubscriptionPlan; -use spacetimedb_vm::expr::{self, AuthAccess, IndexJoin, Query, QueryExpr, SourceExpr, SourceProvider, SourceSet}; -use spacetimedb_vm::rel_ops::RelOps; -use spacetimedb_vm::relation::{MemTable, RelValue}; -use std::hash::Hash; -use std::iter; use std::sync::Arc; -use std::time::Duration; - -/// A [`QueryExpr`] tagged with [`query::Supported`]. -/// -/// Constructed via `TryFrom`, which rejects unsupported queries. -#[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub struct SupportedQuery { - pub kind: query::Supported, - pub expr: QueryExpr, - pub sql: String, -} - -impl SupportedQuery { - pub fn new(expr: QueryExpr, sql: String) -> Result { - let kind = query::classify(&expr).ok_or_else(|| SubscriptionError::Unsupported(sql.clone()))?; - Ok(Self { kind, expr, sql }) - } - - pub fn kind(&self) -> query::Supported { - self.kind - } - - pub fn as_expr(&self) -> &QueryExpr { - self.as_ref() - } - - /// The table whose rows are being returned. - pub fn return_table(&self) -> TableId { - self.expr.source.get_db_table().unwrap().table_id - } - - pub fn return_name(&self) -> &TableName { - self.expr.source.table_name() - } - - /// This is the same as the return table unless this is a join. - /// For joins this is the table whose rows are not being returned. - pub fn filter_table(&self) -> TableId { - let return_table = self.return_table(); - self.expr - .query - .first() - .and_then(|op| { - if let Query::IndexJoin(join) = op { - Some(join) - } else { - None - } - }) - .and_then(|join| { - join.index_side - .get_db_table() - .filter(|t| t.table_id != return_table) - .or_else(|| join.probe_side.source.get_db_table()) - .filter(|t| t.table_id != return_table) - .map(|t| t.table_id) - }) - .unwrap_or(return_table) - } -} - -#[cfg(test)] -impl TryFrom<(QueryExpr, String)> for SupportedQuery { - type Error = DBError; - - fn try_from((expr, sql): (QueryExpr, String)) -> Result { - let kind = query::classify(&expr).context("Unsupported query expression")?; - Ok(Self { kind, expr, sql }) - } -} - -impl AsRef for SupportedQuery { - fn as_ref(&self) -> &QueryExpr { - &self.expr - } -} - -/// Evaluates `query` and returns all the updates. -fn eval_updates<'a>( - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - query: &'a QueryExpr, - mut sources: impl SourceProvider<'a>, -) -> impl 'a + Iterator> { - let mut query = build_query(db, tx, query, &mut sources); - iter::from_fn(move || query.next()) -} - -/// A [`query::Supported::Semijoin`] compiled for incremental evaluations. -/// -/// The following assumptions are made for the incremental evaluation to be -/// correct without maintaining a materialized view: -/// -/// * The join is a primary foreign key semijoin, i.e. one row from the -/// right table joins with at most one row from the left table. -/// * The rows in the [`DatabaseTableUpdate`]s on either side of the join -/// are already committed to the underlying "physical" tables. -/// * We maintain set semantics, i.e. no two rows with the same value can appear in the result. -/// -/// See [IncrementalJoin::eval] for a detailed algorithmic explanation. -/// However note that there are at most three distinct plans that we must evaluate. -/// They are: -/// -/// 1. A(+|-) x B -/// 2. A x B(+|-) -/// 3. A(+|-) x B(+|-) -/// -/// All three of these plans are compiled ahead of time, -/// before the evaluation of any row updates. -/// -/// For a more in-depth discussion, see the [module-level documentation](./index.html). -#[derive(Debug)] -pub struct IncrementalJoin { - /// The lhs table which may be the index side or the probe side. - lhs: DbTable, - /// The rhs table which may be the index side or the probe side. - rhs: DbTable, - /// This determines which side is the index side and which is the probe side. - return_index_rows: bool, - /// A(+|-) join B - virtual_index_plan: QueryExpr, - /// A join B(+|-) - virtual_probe_plan: QueryExpr, - /// A(+|-) join B(+|-) - virtual_plan: QueryExpr, -} - -impl IncrementalJoin { - fn optimize_query(join: IndexJoin) -> QueryExpr { - let expr = QueryExpr::from(join); - // Because (at least) one of the two tables will be a `MemTable`, - // and therefore not have indexes, - // the `row_count` function we pass to `optimize` is useless; - // either the `DbTable` must be used as the index side, - // or for the `A- join B-` case, the join must be rewritten to not use indexes. - expr.optimize(&|_, _| 0) - } - - /// Return the query plan where the lhs is a delta table. - fn plan_for_delta_lhs(&self) -> &QueryExpr { - if self.return_index_rows { - &self.virtual_index_plan - } else { - &self.virtual_probe_plan - } - } - - /// Return the query plan where the rhs is a delta table. - fn plan_for_delta_rhs(&self) -> &QueryExpr { - if self.return_index_rows { - &self.virtual_probe_plan - } else { - &self.virtual_index_plan - } - } - - /// Construct an [`IncrementalJoin`] from a [`QueryExpr`]. - /// - /// - /// An error is returned if the expression is not well-formed. - pub fn new(expr: &QueryExpr) -> anyhow::Result { - if expr.query.len() != 1 { - return Err(anyhow::anyhow!("expected a single index join, but got {expr:#?}")); - } - let expr::Query::IndexJoin(ref join) = expr.query[0] else { - return Err(anyhow::anyhow!("expected a single index join, but got {expr:#?}")); - }; - - let index_table = join - .index_side - .get_db_table() - .context("expected a physical database table")? - .clone(); - let probe_table = join - .probe_side - .source - .get_db_table() - .context("expected a physical database table")? - .clone(); - - let (virtual_index_plan, _sources) = with_delta_table(join.clone(), Some(Vec::new()), None); - debug_assert_eq!(_sources.len(), 1); - let virtual_index_plan = Self::optimize_query(virtual_index_plan); - - let (virtual_probe_plan, _sources) = with_delta_table(join.clone(), None, Some(Vec::new())); - debug_assert_eq!(_sources.len(), 1); - let virtual_probe_plan = Self::optimize_query(virtual_probe_plan); - - let (virtual_plan, _sources) = with_delta_table(join.clone(), Some(Vec::new()), Some(Vec::new())); - debug_assert_eq!(_sources.len(), 2); - let virtual_plan = virtual_plan.to_inner_join(); - - let return_index_rows = join.return_index_rows; - - let (lhs, rhs) = if return_index_rows { - (index_table, probe_table) - } else { - (probe_table, index_table) - }; - - Ok(Self { - lhs, - rhs, - return_index_rows, - virtual_index_plan, - virtual_probe_plan, - virtual_plan, - }) - } - - /// Evaluate join plan for lhs updates. - fn eval_lhs<'a>( - &'a self, - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - lhs: impl 'a + Iterator, - ) -> impl Iterator> { - eval_updates(db, tx, self.plan_for_delta_lhs(), Some(lhs.map(RelValue::ProjRef))) - } - - /// Evaluate join plan for rhs updates. - fn eval_rhs<'a>( - &'a self, - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - rhs: impl 'a + Iterator, - ) -> impl Iterator> { - eval_updates(db, tx, self.plan_for_delta_rhs(), Some(rhs.map(RelValue::ProjRef))) - } - - /// Evaluate join plan for both lhs and rhs updates. - fn eval_all<'a>( - &'a self, - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - lhs: impl 'a + Iterator, - rhs: impl 'a + Iterator, - ) -> impl Iterator> { - let is = Either::Left(lhs.map(RelValue::ProjRef)); - let ps = Either::Right(rhs.map(RelValue::ProjRef)); - let sources: SourceSet<_, 2> = if self.return_index_rows { [is, ps] } else { [ps, is] }.into(); - eval_updates(db, tx, &self.virtual_plan, sources) - } - - /// Evaluate this [`IncrementalJoin`] over the row updates of a transaction t. - /// - /// In the comments that follow, - /// B(t) refers to the state of table B as of transaction t. - /// In particular, B(t) includes all of the changes from t. - /// B(s) refers to the state of table B as of transaction s, - /// where s is the transaction immediately preceding t. - /// - /// Now we may ask, - /// given a set of updates to tables A and/or B, - /// how to efficiently compute the semijoin A(t) x B(t)? - /// - /// First consider newly inserted rows of A. - /// We want to know if they join with any newly inserted rows of B, - /// or if they join with any previously existing rows of B. - /// That is: - /// - /// A+ x B(t) - /// - /// Note that we don't need to consider deleted rows from B. - /// Because they have no bearing on newly inserted rows of A. - /// - /// Now consider rows that were deleted from A. - /// Similarly we want to know if they join with any deleted rows of B, - /// or if they join with any previously existing rows of B. - /// That is: - /// - /// A- x B(s) U A- x B- = A- x B(t) \ A- x B+ U A- x B- - /// - /// Note that we don't necessarily care about newly inserted rows of B in this case. - /// Because even if they join with deleted rows of A, - /// they were never included in the results to begin with. - /// However, during this evaluation, we no longer have direct access to B(s). - /// Hence we must derive it by subtracting A- x B+ from A- x B(t). - /// - /// Finally we must consider previously existing rows of A. - /// That is: - /// - /// A(s) x B+ = A(t) x B+ \ A+ x B+ - /// A(s) x B- = A(t) x B- \ A+ x B- - /// - /// In total we must consider 8 distinct joins. - /// They are: - /// - /// (1) A+ x B(t) - /// (2) A- x B(t) - /// (3) A- x B+ - /// (4) A- x B- - /// (5) A(t) x B+ - /// (6) A(t) x B- - /// (7) A+ x B+ - /// (8) A+ x B- - pub fn eval<'a>( - &'a self, - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - updates: impl 'a + Clone + Iterator, - ) -> UpdatesRelValue<'a> { - // Find any updates to the tables mentioned by `self` and group them into [`JoinSide`]s. - // - // The supplied updates are assumed to be the full set of updates from a single transaction. - // - // If neither side of the join is modified by any of the updates, `None` is returned. - // Otherwise, `Some((index_table, probe_table))` is returned - // with the updates partitioned into the respective [`JoinSide`]. - // ===================================================================== - - // Partitions `updates` into `deletes` and `inserts` for `lhs` and `rhs`. - let mut lhs_deletes = updates - .clone() - .filter(|u| u.table_id == self.lhs.table_id) - .flat_map(|u| u.deletes.iter()) - .peekable(); - let mut lhs_inserts = updates - .clone() - .filter(|u| u.table_id == self.lhs.table_id) - .flat_map(|u| u.inserts.iter()) - .peekable(); - let mut rhs_deletes = updates - .clone() - .filter(|u| u.table_id == self.rhs.table_id) - .flat_map(|u| u.deletes.iter()) - .peekable(); - let mut rhs_inserts = updates - .filter(|u| u.table_id == self.rhs.table_id) - .flat_map(|u| u.inserts.iter()) - .peekable(); - - // No updates at all? Return `None`. - let has_lhs_deletes = lhs_deletes.peek().is_some(); - let has_lhs_inserts = lhs_inserts.peek().is_some(); - let has_rhs_deletes = rhs_deletes.peek().is_some(); - let has_rhs_inserts = rhs_inserts.peek().is_some(); - if !has_lhs_deletes && !has_lhs_inserts && !has_rhs_deletes && !has_rhs_inserts { - return <_>::default(); - } - - // Compute the incremental join - // ===================================================================== - - fn collect_set>( - produce_if: bool, - producer: impl FnOnce() -> I, - ) -> HashSet { - if produce_if { - producer().collect() - } else { - HashSet::default() - } - } - - fn make_iter>( - produce_if: bool, - producer: impl FnOnce() -> I, - ) -> impl Iterator { - if produce_if { - Either::Left(producer()) - } else { - Either::Right(iter::empty()) - } - } - - // (1) A+ x B(t) - let j1_lhs_ins = lhs_inserts.clone(); - let join_1 = make_iter(has_lhs_inserts, || self.eval_lhs(db, tx, j1_lhs_ins)); - // (2) A- x B(t) - let j2_lhs_del = lhs_deletes.clone(); - let mut join_2 = collect_set(has_lhs_deletes, || self.eval_lhs(db, tx, j2_lhs_del)); - // (3) A- x B+ - let j3_lhs_del = lhs_deletes.clone(); - let j3_rhs_ins = rhs_inserts.clone(); - let join_3 = make_iter(has_lhs_deletes && has_rhs_inserts, || { - self.eval_all(db, tx, j3_lhs_del, j3_rhs_ins) - }); - // (4) A- x B- - let j4_rhs_del = rhs_deletes.clone(); - let join_4 = make_iter(has_lhs_deletes && has_rhs_deletes, || { - self.eval_all(db, tx, lhs_deletes, j4_rhs_del) - }); - // (5) A(t) x B+ - let j5_rhs_ins = rhs_inserts.clone(); - let mut join_5 = collect_set(has_rhs_inserts, || self.eval_rhs(db, tx, j5_rhs_ins)); - // (6) A(t) x B- - let j6_rhs_del = rhs_deletes.clone(); - let mut join_6 = collect_set(has_rhs_deletes, || self.eval_rhs(db, tx, j6_rhs_del)); - // (7) A+ x B+ - let j7_lhs_ins = lhs_inserts.clone(); - let join_7 = make_iter(has_lhs_inserts && has_rhs_inserts, || { - self.eval_all(db, tx, j7_lhs_ins, rhs_inserts) - }); - // (8) A+ x B- - let join_8 = make_iter(has_lhs_inserts && has_rhs_deletes, || { - self.eval_all(db, tx, lhs_inserts, rhs_deletes) - }); - - // A- x B(s) = A- x B(t) \ A- x B+ - for row in join_3 { - join_2.remove(&row); - } - // A(s) x B+ = A(t) x B+ \ A+ x B+ - for row in join_7 { - join_5.remove(&row); - } - // A(s) x B- = A(t) x B- \ A+ x B- - for row in join_8 { - join_6.remove(&row); - } - - join_5.retain(|row| !join_6.remove(row)); - - // Collect deletes: - let mut deletes = Vec::new(); - deletes.extend(join_2); - for row in join_4 { - deletes.push(row); - } - deletes.extend(join_6); - - // Collect inserts: - let mut inserts = Vec::new(); - for row in join_1 { - inserts.push(row); - } - inserts.extend(join_5); - - UpdatesRelValue { deletes, inserts } - } -} - -/// Replace an [IndexJoin]'s scan or fetch operation with a delta table. -/// A delta table consists purely of updates or changes to the base table. -fn with_delta_table( - mut join: IndexJoin, - index_side: Option>, - probe_side: Option>, -) -> (IndexJoin, SourceSet, 2>) { - let mut sources = SourceSet::empty(); - - let mut add_mem_table = - |side: SourceExpr, data| sources.add_mem_table(MemTable::new(side.head().clone(), side.table_access(), data)); - - if let Some(index_side) = index_side { - join.index_side = add_mem_table(join.index_side, index_side); - } - - if let Some(probe_side) = probe_side { - join.probe_side.source = add_mem_table(join.probe_side.source, probe_side); - } - - (join, sources) -} - -/// A set of independent single or multi-query execution units. -#[derive(Debug, PartialEq, Eq)] -pub struct ExecutionSet { - exec_units: Vec>, -} - -impl ExecutionSet { - pub fn eval( - &self, - db: &RelationalDB, - tx: &Tx, - rlb_pool: &impl RowListBuilderSource, - slow_query_threshold: Option, - compression: ws_v1::Compression, - ) -> ws_v1::DatabaseUpdate { - // evaluate each of the execution units in this ExecutionSet in parallel - let tables = self - .exec_units - // if you need eval to run single-threaded for debugging, change this to .iter() - .iter() - .filter_map(|unit| unit.eval(db, tx, rlb_pool, &unit.sql, slow_query_threshold, compression)) - .collect(); - ws_v1::DatabaseUpdate { tables } - } - - #[tracing::instrument(level = "trace", skip_all)] - pub fn eval_incr_for_test<'a>( - &'a self, - - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - database_update: &'a [&'a DatabaseTableUpdate], - slow_query_threshold: Option, - ) -> DatabaseUpdateRelValue<'a> { - let mut tables = Vec::new(); - for unit in &self.exec_units { - if let Some(table) = - unit.eval_incr(db, tx, &unit.sql, database_update.iter().copied(), slow_query_threshold) - { - tables.push(table); - } - } - - DatabaseUpdateRelValue { tables } - } - - /// The estimated number of rows returned by this execution set. - pub fn row_estimate(&self, tx: &TxId) -> u64 { - self.exec_units - .iter() - .map(|unit| unit.row_estimate(tx)) - .fold(0, |acc, est| acc.saturating_add(est)) - } - /// Return an iterator over the execution units - pub fn iter(&self) -> impl Iterator { - self.exec_units.iter().map(|arc| &**arc) - } -} - -impl FromIterator for ExecutionSet { - fn from_iter>(iter: T) -> Self { - ExecutionSet { - exec_units: iter.into_iter().map(|plan| Arc::new(plan.into())).collect(), - } - } -} - -impl IntoIterator for ExecutionSet { - type Item = Arc; - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.exec_units.into_iter() - } -} - -impl FromIterator> for ExecutionSet { - fn from_iter>>(iter: T) -> Self { - ExecutionSet { - exec_units: iter.into_iter().collect(), - } - } -} - -impl From>> for ExecutionSet { - fn from(value: Vec>) -> Self { - ExecutionSet::from_iter(value) - } -} - -impl From> for ExecutionSet { - fn from(value: Vec) -> Self { - ExecutionSet::from_iter(value) - } -} - -impl AuthAccess for ExecutionSet { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - self.exec_units.iter().try_for_each(|eu| eu.check_auth(auth)) - } -} - -/// Querieshttps://github.com/clockworklabs/SpacetimeDBPrivate/pull/2207 all the [`StTableType::User`] tables *right now* -/// and turns them into [`QueryExpr`], -/// the moral equivalent of `SELECT * FROM table`. +/// Queries all visible user tables right now and turns them into subscription plans. pub(crate) fn get_all( get_all_tables: F, relational_db: &RelationalDB, @@ -631,277 +30,10 @@ where SubscriptionPlan::compile(&sql, &tx, auth).map(|(plans, has_param)| { Plan::new( plans, - QueryHash::from_string( - &sql, - auth.caller(), - // Note that when generating hashes for queries from owners, - // we always treat them as if they were parameterized by :sender. - // This is because RLS is not applicable to owners. - // Hence owner hashes must never overlap with client hashes. - auth.bypass_rls() || has_param, - ), + QueryHash::from_string(&sql, auth.caller(), auth.bypass_rls() || has_param), sql, ) }) }) .collect::>()?) } - -/// Queries all the [`StTableType::User`] tables *right now* -/// and turns them into [`QueryExpr`], -/// the moral equivalent of `SELECT * FROM table`. -#[cfg(test)] -pub(crate) fn legacy_get_all( - relational_db: &RelationalDB, - tx: &Tx, - auth: &AuthCtx, -) -> Result, DBError> { - use std::ops::Deref; - - Ok(relational_db - .get_all_tables(tx)? - .iter() - .map(Deref::deref) - .filter(|t| t.table_type == StTableType::User && auth.has_read_access(t.table_access) && !t.is_event) - .map(|src| SupportedQuery { - kind: query::Supported::Select, - expr: QueryExpr::new(src), - sql: format!("SELECT * FROM {}", src.table_name), - }) - .collect()) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::db::relational_db::tests_utils::{begin_tx, TestDB}; - use crate::sql::compiler::compile_sql; - use spacetimedb_lib::{error::ResultTest, identity::AuthCtx}; - use spacetimedb_sats::{product, AlgebraicType}; - use spacetimedb_schema::relation::DbTable; - use spacetimedb_vm::expr::{CrudExpr, IndexJoin, Query, SourceExpr}; - - #[test] - // Compile an index join after replacing the index side with a virtual table. - // The original index and probe sides should be swapped after introducing the delta table. - fn compile_incremental_index_join_index_side() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let _ = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with index on [b, c] - let schema = &[ - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - let indexes = &[0.into(), 1.into()]; - let rhs_id = db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should generate an index join since there is an index on `lhs.b`. - // Should push the sargable range condition into the index join's probe side. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c > 2 and rhs.c < 4 and rhs.d = 3"; - let exp = compile_sql(&db, &AuthCtx::for_testing(), &tx, sql)?.remove(0); - - let CrudExpr::Query(mut expr) = exp else { - panic!("unexpected result from compilation: {exp:#?}"); - }; - - assert_eq!(&**expr.source.table_name(), "lhs"); - assert_eq!(expr.query.len(), 1); - - let join = expr.query.pop().unwrap(); - let Query::IndexJoin(join) = join else { - panic!("expected an index join, but got {join:#?}"); - }; - - // Create an insert for an incremental update. - let delta = vec![product![0u64, 0u64]]; - - // Optimize the query plan for the incremental update. - let (expr, _sources) = with_delta_table(join, Some(delta), None); - let expr: QueryExpr = expr.into(); - let mut expr = expr.optimize(&|_, _| i64::MAX); - assert_eq!(&**expr.source.table_name(), "lhs"); - assert_eq!(expr.query.len(), 1); - - let join = expr.query.pop().unwrap(); - let Query::IndexJoin(join) = join else { - panic!("expected an index join, but got {join:#?}"); - }; - - let IndexJoin { - probe_side: - QueryExpr { - source: SourceExpr::InMemory { .. }, - query: ref lhs, - }, - probe_col, - index_side: SourceExpr::DbTable(DbTable { - table_id: index_table, .. - }), - index_select: Some(_), - index_col, - return_index_rows: false, - } = join - else { - panic!("unexpected index join {join:#?}"); - }; - - assert!(lhs.is_empty()); - - // Assert that original index and probe tables have been swapped. - assert_eq!(index_table, rhs_id); - assert_eq!(index_col, 0.into()); - assert_eq!(probe_col, 1.into()); - Ok(()) - } - - #[test] - // Compile an index join after replacing the probe side with a virtual table. - // The original index and probe sides should remain after introducing the virtual table. - fn compile_incremental_index_join_probe_side() -> ResultTest<()> { - let db = TestDB::durable()?; - - // Create table [lhs] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let lhs_id = db.create_table_for_test("lhs", schema, indexes)?; - - // Create table [rhs] with index on [b, c] - let schema = &[ - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - let indexes = &[0.into(), 1.into()]; - let _ = db.create_table_for_test("rhs", schema, indexes)?; - - let tx = begin_tx(&db); - // Should generate an index join since there is an index on `lhs.b`. - // Should push the sargable range condition into the index join's probe side. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c > 2 and rhs.c < 4 and rhs.d = 3"; - let exp = compile_sql(&db, &AuthCtx::for_testing(), &tx, sql)?.remove(0); - - let CrudExpr::Query(mut expr) = exp else { - panic!("unexpected result from compilation: {exp:#?}"); - }; - - assert_eq!(&**expr.source.table_name(), "lhs"); - assert_eq!(expr.query.len(), 1); - - let join = expr.query.pop().unwrap(); - let Query::IndexJoin(join) = join else { - panic!("expected an index join, but got {join:#?}"); - }; - - // Create an insert for an incremental update. - let delta = vec![product![0u64, 0u64, 0u64]]; - - // Optimize the query plan for the incremental update. - let (expr, _sources) = with_delta_table(join, None, Some(delta)); - let expr = QueryExpr::from(expr); - let mut expr = expr.optimize(&|_, _| i64::MAX); - - assert_eq!(&**expr.source.table_name(), "lhs"); - assert_eq!(expr.query.len(), 1); - assert!(expr.source.is_db_table()); - - let join = expr.query.pop().unwrap(); - let Query::IndexJoin(join) = join else { - panic!("expected an index join, but got {join:#?}"); - }; - - let IndexJoin { - probe_side: - QueryExpr { - source: SourceExpr::InMemory { .. }, - query: ref rhs, - }, - probe_col, - index_side: SourceExpr::DbTable(DbTable { - table_id: index_table, .. - }), - index_select: None, - index_col, - return_index_rows: true, - } = join - else { - panic!("unexpected index join {join:#?}"); - }; - - assert!(!rhs.is_empty()); - - // Assert that original index and probe tables have not been swapped. - assert_eq!(index_table, lhs_id); - assert_eq!(index_col, 1.into()); - assert_eq!(probe_col, 0.into()); - Ok(()) - } - - #[test] - fn compile_incremental_join_unindexed_semi_join() { - let db = TestDB::durable().expect("failed to make test db"); - - // Create table [lhs] with index on [b] - let schema = &[("a", AlgebraicType::U64), ("b", AlgebraicType::U64)]; - let indexes = &[1.into()]; - let _lhs_id = db - .create_table_for_test("lhs", schema, indexes) - .expect("Failed to create_table_for_test lhs"); - - // Create table [rhs] with index on [b, c] - let schema = &[ - ("b", AlgebraicType::U64), - ("c", AlgebraicType::U64), - ("d", AlgebraicType::U64), - ]; - let indexes = &[0.into(), 1.into()]; - let _rhs_id = db - .create_table_for_test("rhs", schema, indexes) - .expect("Failed to create_table_for_test rhs"); - - let tx = begin_tx(&db); - - // Should generate an index join since there is an index on `lhs.b`. - // Should push the sargable range condition into the index join's probe side. - let sql = "select lhs.* from lhs join rhs on lhs.b = rhs.b where rhs.c > 2 and rhs.c < 4 and rhs.d = 3"; - let exp = compile_sql(&db, &AuthCtx::for_testing(), &tx, sql) - .expect("Failed to compile_sql") - .remove(0); - - let CrudExpr::Query(expr) = exp else { - panic!("unexpected result from compilation: {exp:#?}"); - }; - - assert_eq!(&**expr.source.table_name(), "lhs"); - assert_eq!(expr.query.len(), 1); - - let src_join = &expr.query[0]; - assert!( - matches!(src_join, Query::IndexJoin(_)), - "expected an index join, but got {src_join:#?}" - ); - - let incr = IncrementalJoin::new(&expr).expect("Failed to construct IncrementalJoin"); - - let virtual_plan = &incr.virtual_plan; - - assert!(virtual_plan.source.is_mem_table()); - assert_eq!(virtual_plan.source.head(), expr.source.head()); - assert_eq!(virtual_plan.head(), expr.head()); - assert_eq!(virtual_plan.query.len(), 1); - let incr_join = &virtual_plan.query[0]; - let Query::JoinInner(incr_join) = incr_join else { - panic!("expected an inner semijoin, but got {incr_join:#?}"); - }; - assert!(incr_join.rhs.source.is_mem_table()); - assert_ne!(incr_join.rhs.source.head(), expr.source.head()); - assert_ne!(incr_join.rhs.head(), expr.head()); - assert_eq!(incr_join.inner, None); - } -} diff --git a/crates/core/src/util/mod.rs b/crates/core/src/util/mod.rs index 5a5c1fb8420..32902924195 100644 --- a/crates/core/src/util/mod.rs +++ b/crates/core/src/util/mod.rs @@ -8,7 +8,6 @@ pub mod prometheus_handle; pub mod jobs; pub mod notify_once; -pub mod slow; // TODO: use String::from_utf8_lossy_owned once stabilized pub(crate) fn string_from_utf8_lossy_owned(v: Vec) -> String { diff --git a/crates/core/src/util/slow.rs b/crates/core/src/util/slow.rs deleted file mode 100644 index ea6701658cc..00000000000 --- a/crates/core/src/util/slow.rs +++ /dev/null @@ -1,154 +0,0 @@ -use std::time::{Duration, Instant}; - -use spacetimedb_datastore::execution_context::WorkloadType; - -/// Records the execution time of some `sql` -/// and logs when the duration goes above a specific one. -pub struct SlowQueryLogger<'a> { - /// The SQL statement of the query. - sql: &'a str, - /// The start time of the query execution. - start: Option, - /// The threshold, if any, over which execution duration would result in logging. - threshold: Option, - /// The context the query is being run in. - workload: WorkloadType, -} - -impl<'a> SlowQueryLogger<'a> { - pub fn new(sql: &'a str, threshold: Option, workload: WorkloadType) -> Self { - Self { - sql, - start: threshold.map(|_| Instant::now()), - threshold, - workload, - } - } - - pub fn log_guard(self) -> impl Drop + 'a { - scopeguard::guard(self, |logger| { - logger.log(); - }) - } - - /// Log as `tracing::warn!` the query if it exceeds the threshold. - pub fn log(&self) -> Option { - if let Some((start, threshold)) = self.start.zip(self.threshold) { - let elapsed = start.elapsed(); - if elapsed > threshold { - tracing::warn!(workload = %self.workload, ?threshold, ?elapsed, sql = ?self.sql, "SLOW QUERY"); - return Some(elapsed); - } - } - None - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use super::*; - - use crate::sql::compiler::compile_sql; - use crate::sql::execute::tests::execute_for_testing; - use spacetimedb_datastore::system_tables::ST_VARNAME_SLOW_QRY; - use spacetimedb_datastore::system_tables::{StVarName, ST_VARNAME_SLOW_INC, ST_VARNAME_SLOW_SUB}; - use spacetimedb_lib::error::ResultTest; - use spacetimedb_lib::identity::AuthCtx; - use spacetimedb_lib::st_var::StVarValue; - use spacetimedb_lib::ProductValue; - - use crate::db::relational_db::tests_utils::{begin_tx, insert, with_auto_commit, TestDB}; - use crate::db::relational_db::RelationalDB; - use spacetimedb_sats::{product, AlgebraicType}; - use spacetimedb_vm::relation::MemTable; - - fn run_query(db: &Arc, sql: String) -> ResultTest { - let tx = begin_tx(db); - let q = compile_sql(db, &AuthCtx::for_testing(), &tx, &sql)?; - Ok(execute_for_testing(db, &sql, q)?.pop().unwrap()) - } - - fn run_query_write(db: &Arc, sql: String) -> ResultTest<()> { - let tx = begin_tx(db); - let q = compile_sql(db, &AuthCtx::for_testing(), &tx, &sql)?; - drop(tx); - - execute_for_testing(db, &sql, q)?; - Ok(()) - } - - #[test] - fn test_slow_queries() -> ResultTest<()> { - let db = TestDB::in_memory()?.db; - - let table_id = - db.create_table_for_test("test", &[("x", AlgebraicType::I32), ("y", AlgebraicType::I32)], &[])?; - - with_auto_commit(&db, |tx| -> ResultTest<_> { - for i in 0..100_000 { - insert(&db, tx, table_id, &product![i, i * 2])?; - } - Ok(()) - })?; - let tx = begin_tx(&db); - - let sql = "select * from test where x > 0"; - let q = compile_sql(&db, &AuthCtx::for_testing(), &tx, sql)?; - - let slow = SlowQueryLogger::new(sql, Some(Duration::from_millis(1)), tx.ctx.workload()); - - let result = execute_for_testing(&db, sql, q)?; - assert_eq!(result[0].data[0], product![1, 2]); - assert!(slow.log().is_some()); - - Ok(()) - } - - // Verify we can change the threshold at runtime - #[test] - fn test_runtime_config() -> ResultTest<()> { - let db = TestDB::in_memory()?.db; - - fn fetch_row(table: MemTable) -> Option { - table.data.into_iter().next() - } - - // Check we can read the default config - let row1 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_QRY}"))?); - let row2 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_SUB}"))?); - let row3 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_INC}"))?); - - assert_eq!(row1, None); - assert_eq!(row2, None); - assert_eq!(row3, None); - - // Check we can write a new config - run_query_write(&db, format!("SET {ST_VARNAME_SLOW_QRY} TO 1"))?; - run_query_write(&db, format!("SET {ST_VARNAME_SLOW_SUB} TO 1"))?; - run_query_write(&db, format!("SET {ST_VARNAME_SLOW_INC} TO 1"))?; - - let row1 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_QRY}"))?); - let row2 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_SUB}"))?); - let row3 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_INC}"))?); - - assert_eq!(row1, Some(product!(StVarName::SlowQryThreshold, StVarValue::U64(1)))); - assert_eq!(row2, Some(product!(StVarName::SlowSubThreshold, StVarValue::U64(1)))); - assert_eq!(row3, Some(product!(StVarName::SlowIncThreshold, StVarValue::U64(1)))); - - // And disable the config - run_query_write(&db, format!("DELETE FROM st_var WHERE name = '{ST_VARNAME_SLOW_QRY}'"))?; - run_query_write(&db, format!("DELETE FROM st_var WHERE name = '{ST_VARNAME_SLOW_SUB}'"))?; - run_query_write(&db, format!("DELETE FROM st_var WHERE name = '{ST_VARNAME_SLOW_INC}'"))?; - - let row1 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_QRY}"))?); - let row2 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_SUB}"))?); - let row3 = fetch_row(run_query(&db, format!("SHOW {ST_VARNAME_SLOW_INC}"))?); - - assert_eq!(row1, None); - assert_eq!(row2, None); - assert_eq!(row3, None); - Ok(()) - } -} diff --git a/crates/core/src/vm.rs b/crates/core/src/vm.rs index 8f69ee76317..5273179c55f 100644 --- a/crates/core/src/vm.rs +++ b/crates/core/src/vm.rs @@ -1,469 +1,14 @@ -//! The [DbProgram] that execute arbitrary queries & code against the database. - -use crate::db::relational_db::{MutTx, RelationalDB, Tx}; +use crate::db::relational_db::{RelationalDB, Tx}; use crate::error::DBError; -use crate::estimation; -use core::ops::{Bound, Deref, RangeBounds}; -use itertools::Itertools; -use spacetimedb_data_structures::map::IntMap; -use spacetimedb_datastore::execution_context::ExecutionContext; -use spacetimedb_datastore::locking_tx_datastore::state_view::IterByColRangeMutTx; -use spacetimedb_datastore::locking_tx_datastore::IterByColRangeTx; -use spacetimedb_datastore::locking_tx_datastore::TxId; -use spacetimedb_datastore::system_tables::{st_var_schema, StVarName, StVarRow}; use spacetimedb_lib::identity::AuthCtx; -use spacetimedb_primitives::*; -use spacetimedb_sats::{AlgebraicValue, ProductValue}; -use spacetimedb_schema::relation::{ColExpr, DbTable}; -use spacetimedb_table::static_assert_size; -use spacetimedb_table::table::RowRef; -use spacetimedb_vm::errors::ErrorVm; -use spacetimedb_vm::eval::{box_iter, build_project, build_select, join_inner, IterRows}; -use spacetimedb_vm::expr::*; -use spacetimedb_vm::iterators::RelIter; -use spacetimedb_vm::program::{ProgramVm, Sources}; -use spacetimedb_vm::rel_ops::{EmptyRelOps, RelOps}; -use spacetimedb_vm::relation::{MemTable, RelValue}; -use std::str::FromStr; -use std::sync::Arc; - -pub enum TxMode<'a> { - MutTx(&'a mut MutTx), - Tx(&'a Tx), -} - -impl TxMode<'_> { - /// Unwraps `self`, ensuring we are in a mutable tx. - fn unwrap_mut(&mut self) -> &mut MutTx { - match self { - Self::MutTx(tx) => tx, - Self::Tx(_) => unreachable!("mutable operation is invalid with read tx"), - } - } - - pub(crate) fn ctx(&self) -> &ExecutionContext { - match self { - Self::MutTx(tx) => &tx.ctx, - Self::Tx(tx) => &tx.ctx, - } - } -} - -impl<'a> From<&'a mut MutTx> for TxMode<'a> { - fn from(tx: &'a mut MutTx) -> Self { - TxMode::MutTx(tx) - } -} - -impl<'a> From<&'a Tx> for TxMode<'a> { - fn from(tx: &'a Tx) -> Self { - TxMode::Tx(tx) - } -} - -impl<'a> From<&'a mut Tx> for TxMode<'a> { - fn from(tx: &'a mut Tx) -> Self { - TxMode::Tx(tx) - } -} - -fn bound_is_satisfiable(lower: &Bound, upper: &Bound) -> bool { - match (lower, upper) { - (Bound::Excluded(lower), Bound::Excluded(upper)) if lower >= upper => false, - (Bound::Included(lower), Bound::Excluded(upper)) | (Bound::Excluded(lower), Bound::Included(upper)) - if lower > upper => - { - false - } - _ => true, - } -} - -//TODO: This is partially duplicated from the `vm` crate to avoid borrow checker issues -//and pull all that crate in core. Will be revisited after trait refactor -pub fn build_query<'a>( - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - query: &'a QueryExpr, - sources: &mut impl SourceProvider<'a>, -) -> Box> { - let db_table = query.source.is_db_table(); - - // We're incrementally building a query iterator by applying each operation in the `query.query`. - // Most such operations will modify their parent, but certain operations (i.e. `IndexJoin`s) - // are only valid as the first operation in the list, - // and construct a new base query. - // - // Branches which use `result` will do `unwrap_or_else(|| get_table(ctx, db, tx, &query.table, sources))` - // to get an `IterRows` defaulting to the `query.table`. - // - // Branches which do not use the `result` will assert that it is `None`, - // i.e. that they are the first operator. - // - // TODO(bikeshedding): Avoid duplication of the ugly `result.take().map(...).unwrap_or_else(...)?` expr? - // TODO(bikeshedding): Refactor `QueryExpr` to separate `IndexJoin` from other `Query` variants, - // removing the need for this convoluted logic? - let mut result = None; - - let result_or_base = |sources: &mut _, result: &mut Option<_>| { - result - .take() - .unwrap_or_else(|| get_table(db, tx, &query.source, sources)) - }; - - for op in &query.query { - result = Some(match op { - Query::IndexScan(IndexScan { table, columns, bounds }) if db_table => { - if !bound_is_satisfiable(&bounds.0, &bounds.1) { - // If the bound is impossible to satisfy - // because the lower bound is greater than the upper bound, or both bounds are excluded and equal, - // return an empty iterator. - // This avoids a panic in `BTreeMap`'s `NodeRef::search_tree_for_bifurcation`, - // which is very unhappy about unsatisfiable bounds. - Box::new(EmptyRelOps) as Box> - } else { - let bounds = (bounds.start_bound(), bounds.end_bound()); - iter_by_col_range(db, tx, table, columns.clone(), bounds) - } - } - Query::IndexScan(index_scan) => { - let result = result_or_base(sources, &mut result); - let cols = &index_scan.columns; - let bounds = &index_scan.bounds; - - if !bound_is_satisfiable(&bounds.0, &bounds.1) { - // If the bound is impossible to satisfy - // because the lower bound is greater than the upper bound, or both bounds are excluded and equal, - // return an empty iterator. - // Unlike the above case, this is not necessary, as the below `select` will never panic, - // but it's still nice to avoid needlessly traversing a bunch of rows. - // TODO: We should change the compiler to not emit an `IndexScan` in this case, - // so that this branch is unreachable. - // The current behavior is a hack - // because this patch was written (2024-04-01 pgoldman) a short time before the BitCraft alpha, - // and a more invasive change was infeasible. - Box::new(EmptyRelOps) as Box> - } else if let Some(head) = cols.as_singleton() { - // For singleton constraints, we compare the column directly against `bounds`. - let head = head.idx(); - let iter = result.select(move |row| bounds.contains(&*row.read_column(head).unwrap())); - Box::new(iter) as Box> - } else { - // For multi-col constraints, these are stored as bounds of product values, - // so we need to project these into single-col bounds and compare against the column. - // Project start/end `Bound`s to `Bound>`s. - let start_bound = bounds.0.as_ref().map(|av| &av.as_product().unwrap().elements); - let end_bound = bounds.1.as_ref().map(|av| &av.as_product().unwrap().elements); - // Construct the query: - Box::new(result.select(move |row| { - // Go through each column position, - // project to a `Bound` for the position, - // and compare against the column in the row. - // All columns must match to include the row, - // which is essentially the same as a big `AND` of `ColumnOp`s. - cols.iter().enumerate().all(|(idx, col)| { - let start_bound = start_bound.map(|pv| &pv[idx]); - let end_bound = end_bound.map(|pv| &pv[idx]); - let read_col = row.read_column(col.idx()).unwrap(); - (start_bound, end_bound).contains(&*read_col) - }) - })) - } - } - Query::IndexJoin(_) if result.is_some() => panic!("Invalid query: `IndexJoin` must be the first operator"), - Query::IndexJoin(IndexJoin { - probe_side, - probe_col, - index_side, - index_select, - index_col, - return_index_rows, - }) => { - let probe_side = build_query(db, tx, probe_side, sources); - // The compiler guarantees that the index side is a db table, - // and therefore this unwrap is always safe. - let index_table = index_side.table_id().unwrap(); - - if *return_index_rows { - index_semi_join_left(db, tx, probe_side, *probe_col, index_select, index_table, *index_col) - } else { - index_semi_join_right(db, tx, probe_side, *probe_col, index_select, index_table, *index_col) - } - } - Query::Select(cmp) => build_select(result_or_base(sources, &mut result), cmp), - Query::Project(proj) => build_project(result_or_base(sources, &mut result), proj), - Query::JoinInner(join) => join_inner( - result_or_base(sources, &mut result), - build_query(db, tx, &join.rhs, sources), - join, - ), - }) - } - result_or_base(sources, &mut result) -} - -/// Resolve `query` to a table iterator, -/// either taken from an in-memory table, in the case of [`SourceExpr::InMemory`], -/// or from a physical table, in the case of [`SourceExpr::DbTable`]. -/// -/// If `query` refers to an in memory table, -/// `sources` will be used to fetch the table `I`. -/// Examples of `I` could be derived from `MemTable` or `&'a [ProductValue]` -/// whereas `sources` could a [`SourceSet`]. -/// -/// On the other hand, if the `query` is a `SourceExpr::DbTable`, `sources` is unused. -fn get_table<'a>( - stdb: &'a RelationalDB, - tx: &'a TxMode, - query: &'a SourceExpr, - sources: &mut impl SourceProvider<'a>, -) -> Box> { - match query { - // Extracts an in-memory table with `source_id` from `sources` and builds a query for the table. - SourceExpr::InMemory { source_id, .. } => build_iter( - sources - .take_source(*source_id) - .unwrap_or_else(|| { - panic!("Query plan specifies in-mem table for {source_id:?}, but found a `DbTable` or nothing") - }) - .into_iter(), - ), - SourceExpr::DbTable(db_table) => build_iter_from_db(match tx { - TxMode::MutTx(tx) => stdb.iter_mut(tx, db_table.table_id).map(box_iter), - TxMode::Tx(tx) => stdb.iter(tx, db_table.table_id).map(box_iter), - }), - } -} - -fn iter_by_col_range<'a>( - db: &'a RelationalDB, - tx: &'a TxMode, - table: &'a DbTable, - columns: ColList, - range: impl RangeBounds + 'a, -) -> Box> { - build_iter_from_db(match tx { - TxMode::MutTx(tx) => db - .iter_by_col_range_mut(tx, table.table_id, columns, range) - .map(box_iter), - TxMode::Tx(tx) => db.iter_by_col_range(tx, table.table_id, columns, range).map(box_iter), - }) -} - -fn build_iter_from_db<'a>(iter: Result>, DBError>) -> Box> { - build_iter(iter.expect(TABLE_ID_EXPECTED_VALID).map(RelValue::Row)) -} - -fn build_iter<'a>(iter: impl 'a + Iterator>) -> Box> { - Box::new(RelIter::new(iter)) as Box> -} - -const TABLE_ID_EXPECTED_VALID: &str = "all `table_id`s in compiled query should be valid"; - -/// An index join operator that returns matching rows from the index side. -pub struct IndexSemiJoinLeft<'c, Rhs, IndexIter, F> { - /// An iterator for the probe side. - /// The values returned will be used to probe the index. - probe_side: Rhs, - /// The column whose value will be used to probe the index. - probe_col: ColId, - /// An optional predicate to evaluate over the matching rows of the index. - index_select: &'c Option, - /// An iterator for the index side. - /// A new iterator will be instantiated for each row on the probe side. - index_iter: Option, - /// The function that returns an iterator for the index side. - index_function: F, -} - -impl<'a, Rhs, IndexIter, F> IndexSemiJoinLeft<'_, Rhs, IndexIter, F> -where - F: Fn(AlgebraicValue) -> Result, - IndexIter: Iterator>, - Rhs: RelOps<'a>, -{ - fn filter(&self, index_row: &RelValue<'_>) -> bool { - self.index_select.as_ref().is_none_or(|op| op.eval_bool(index_row)) - } -} - -impl<'a, Rhs, IndexIter, F> RelOps<'a> for IndexSemiJoinLeft<'_, Rhs, IndexIter, F> -where - F: Fn(AlgebraicValue) -> Result, - IndexIter: Iterator>, - Rhs: RelOps<'a>, -{ - fn next(&mut self) -> Option> { - // Return a value from the current index iterator, if not exhausted. - while let Some(index_row) = self.index_iter.as_mut().and_then(|iter| iter.next()).map(RelValue::Row) { - if self.filter(&index_row) { - return Some(index_row); - } - } - - // Otherwise probe the index with a row from the probe side. - let probe_col = self.probe_col.idx(); - while let Some(mut row) = self.probe_side.next() { - if let Some(value) = row.read_or_take_column(probe_col) { - let mut index_iter = (self.index_function)(value).expect(TABLE_ID_EXPECTED_VALID); - while let Some(index_row) = index_iter.next().map(RelValue::Row) { - if self.filter(&index_row) { - self.index_iter = Some(index_iter); - return Some(index_row); - } - } - } - } - None - } -} - -/// Return an iterator index join operator that returns matching rows from the index side. -pub fn index_semi_join_left<'a>( - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - probe_side: Box>, - probe_col: ColId, - index_select: &'a Option, - index_table: TableId, - index_col: ColId, -) -> Box> { - match tx { - TxMode::MutTx(tx) => Box::new(IndexSemiJoinLeft { - probe_side, - probe_col, - index_select, - index_iter: None, - index_function: move |value| db.iter_by_col_range_mut(tx, index_table, index_col, value), - }), - TxMode::Tx(tx) => Box::new(IndexSemiJoinLeft { - probe_side, - probe_col, - index_select, - index_iter: None, - index_function: move |value| db.iter_by_col_range(tx, index_table, index_col, value), - }), - } -} - -static_assert_size!( - IndexSemiJoinLeft< - Box>, - fn(AlgebraicValue) -> Result, DBError>, - IterByColRangeTx<'static, AlgebraicValue>, - >, - 144 -); -static_assert_size!( - IndexSemiJoinLeft< - Box>, - fn(AlgebraicValue) -> Result, DBError>, - IterByColRangeMutTx<'static, AlgebraicValue>, - >, - 240 -); - -/// An index join operator that returns matching rows from the probe side. -pub struct IndexSemiJoinRight<'c, Rhs: RelOps<'c>, F> { - /// An iterator for the probe side. - /// The values returned will be used to probe the index. - probe_side: Rhs, - /// The column whose value will be used to probe the index. - probe_col: ColId, - /// An optional predicate to evaluate over the matching rows of the index. - index_select: &'c Option, - /// A function that returns an iterator for the index side. - index_function: F, -} - -impl<'a, Rhs: RelOps<'a>, F, IndexIter> IndexSemiJoinRight<'a, Rhs, F> -where - F: Fn(AlgebraicValue) -> Result, - IndexIter: Iterator>, -{ - fn filter(&self, index_row: &RelValue<'_>) -> bool { - self.index_select.as_ref().is_none_or(|op| op.eval_bool(index_row)) - } -} - -impl<'a, Rhs: RelOps<'a>, F, IndexIter> RelOps<'a> for IndexSemiJoinRight<'a, Rhs, F> -where - F: Fn(AlgebraicValue) -> Result, - IndexIter: Iterator>, -{ - fn next(&mut self) -> Option> { - // Otherwise probe the index with a row from the probe side. - let probe_col = self.probe_col.idx(); - while let Some(mut row) = self.probe_side.next() { - if let Some(value) = row.read_or_take_column(probe_col) { - let mut index_iter = (self.index_function)(value).expect(TABLE_ID_EXPECTED_VALID); - while let Some(index_row) = index_iter.next().map(RelValue::Row) { - if self.filter(&index_row) { - return Some(row); - } - } - } - } - None - } -} - -/// Return an iterator index join operator that returns matching rows from the probe side. -pub fn index_semi_join_right<'a>( - db: &'a RelationalDB, - tx: &'a TxMode<'a>, - probe_side: Box>, - probe_col: ColId, - index_select: &'a Option, - index_table: TableId, - index_col: ColId, -) -> Box> { - match tx { - TxMode::MutTx(tx) => Box::new(IndexSemiJoinRight { - probe_side, - probe_col, - index_select, - index_function: move |value| db.iter_by_col_range_mut(tx, index_table, index_col, value), - }), - TxMode::Tx(tx) => Box::new(IndexSemiJoinRight { - probe_side, - probe_col, - index_select, - index_function: move |value| db.iter_by_col_range(tx, index_table, index_col, value), - }), - } -} -static_assert_size!( - IndexSemiJoinRight< - Box>, - fn(AlgebraicValue) -> Result, DBError>, - >, - 40 -); -static_assert_size!( - IndexSemiJoinRight< - Box>, - fn(AlgebraicValue) -> Result, DBError>, - >, - 40 -); - -/// A [ProgramVm] implementation that carry a [RelationalDB] for it -/// query execution -pub struct DbProgram<'db, 'tx> { - pub(crate) db: &'db RelationalDB, - pub(crate) tx: &'tx mut TxMode<'tx>, - pub(crate) auth: AuthCtx, -} - -/// If the subscriber is not the database owner, +/// If the caller is not allowed to exceed the row limit, /// reject the request if the estimated cardinality exceeds the limit. pub fn check_row_limit( queries: &[Query], db: &RelationalDB, - tx: &TxId, - row_est: impl Fn(&Query, &TxId) -> u64, + tx: &Tx, + row_est: impl Fn(&Query, &Tx) -> u64, auth: &AuthCtx, ) -> Result<(), DBError> { if !auth.exceed_row_limit() @@ -481,439 +26,3 @@ pub fn check_row_limit( } Ok(()) } - -impl<'db, 'tx> DbProgram<'db, 'tx> { - pub fn new(db: &'db RelationalDB, tx: &'tx mut TxMode<'tx>, auth: AuthCtx) -> Self { - Self { db, tx, auth } - } - - fn _eval_query(&mut self, query: &QueryExpr, sources: Sources<'_, N>) -> Result { - if let TxMode::Tx(tx) = self.tx { - check_row_limit( - &[query], - self.db, - tx, - |expr, tx| estimation::num_rows(tx, expr), - &self.auth, - )?; - } - - let table_access = query.source.table_access(); - tracing::trace!(table = query.source.table_name().deref()); - - let head = query.head().clone(); - let rows = build_query(self.db, self.tx, query, &mut |id| { - sources.take(id).map(|mt| mt.into_iter().map(RelValue::Projection)) - }) - .collect_vec(|row| row.into_product_value()); - - Ok(Code::Table(MemTable::new(head, table_access, rows))) - } - - // TODO(centril): investigate taking bsatn as input instead. - fn _execute_insert(&mut self, table: &DbTable, inserts: Vec) -> Result { - let tx = self.tx.unwrap_mut(); - let mut scratch = Vec::new(); - for row in &inserts { - row.encode(&mut scratch); - self.db.insert(tx, table.table_id, &scratch)?; - scratch.clear(); - } - Ok(Code::Pass(Some(Update { - table_id: table.table_id, - table_name: table.head.table_name.clone(), - inserts, - deletes: Vec::default(), - }))) - } - - fn _execute_update( - &mut self, - delete: &QueryExpr, - mut assigns: IntMap, - sources: Sources<'_, N>, - ) -> Result { - let result = self._eval_query(delete, sources)?; - let Code::Table(deleted) = result else { - return Ok(result); - }; - - let table = delete - .source - .get_db_table() - .expect("source for Update should be a DbTable"); - - self._execute_delete(table, deleted.data.clone())?; - - // Replace the columns in the matched rows with the assigned - // values. No typechecking is performed here, nor that all - // assignments are consumed. - let deletes = deleted.data.clone(); - let exprs: Vec> = (0..table.head.fields.len()) - .map(ColId::from) - .map(|c| assigns.remove(&c)) - .collect(); - - let insert_rows = deleted - .data - .into_iter() - .map(|row| { - let elements = row - .into_iter() - .zip(&exprs) - .map(|(val, expr)| { - if let Some(ColExpr::Value(assigned)) = expr { - assigned.clone() - } else { - val - } - }) - .collect(); - - ProductValue { elements } - }) - .collect_vec(); - - let result = self._execute_insert(table, insert_rows); - let Ok(Code::Pass(Some(insert))) = result else { - return result; - }; - - Ok(Code::Pass(Some(Update { deletes, ..insert }))) - } - - fn _execute_delete(&mut self, table: &DbTable, rows: Vec) -> Result { - let deletes = rows.clone(); - self.db.delete_by_rel(self.tx.unwrap_mut(), table.table_id, rows); - - Ok(Code::Pass(Some(Update { - table_id: table.table_id, - table_name: table.head.table_name.clone(), - inserts: Vec::default(), - deletes, - }))) - } - - fn _delete_query(&mut self, query: &QueryExpr, sources: Sources<'_, N>) -> Result { - match self._eval_query(query, sources)? { - Code::Table(result) => self._execute_delete(query.source.get_db_table().unwrap(), result.data), - r => Ok(r), - } - } - - fn _set_var(&mut self, name: String, literal: String) -> Result { - let tx = self.tx.unwrap_mut(); - self.db.write_var(tx, StVarName::from_str(&name)?, &literal)?; - Ok(Code::Pass(None)) - } - - fn _read_var(&self, name: String) -> Result { - fn read_key_into_table(env: &DbProgram, name: &str) -> Result { - if let TxMode::Tx(tx) = &env.tx { - let name = StVarName::from_str(name)?; - if let Some(value) = env.db.read_var(tx, name)? { - return Ok(MemTable::from_iter( - Arc::new(st_var_schema().into()), - [ProductValue::from(StVarRow { name, value })], - )); - } - } - Ok(MemTable::from_iter(Arc::new(st_var_schema().into()), [])) - } - Ok(Code::Table(read_key_into_table(self, &name)?)) - } -} - -impl ProgramVm for DbProgram<'_, '_> { - // Safety: For DbProgram with tx = TxMode::Tx variant, all queries must match to CrudCode::Query and no other branch. - fn eval_query(&mut self, query: CrudExpr, sources: Sources<'_, N>) -> Result { - query.check_auth(&self.auth)?; - - match query { - CrudExpr::Query(query) => self._eval_query(&query, sources), - CrudExpr::Insert { table, rows } => self._execute_insert(&table, rows), - CrudExpr::Update { delete, assignments } => self._execute_update(&delete, assignments, sources), - CrudExpr::Delete { query } => self._delete_query(&query, sources), - CrudExpr::SetVar { name, literal } => self._set_var(name, literal), - CrudExpr::ReadVar { name } => self._read_var(name), - } - } -} - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - use crate::db::relational_db::tests_utils::{begin_tx, insert, with_auto_commit, with_read_only, TestDB}; - use pretty_assertions::assert_eq; - use spacetimedb_datastore::system_tables::{ - StColumnFields, StColumnRow, StFields as _, StIndexAlgorithm, StIndexFields, StIndexRow, StSequenceFields, - StSequenceRow, StTableFields, StTableRow, ST_COLUMN_ID, ST_COLUMN_NAME, ST_INDEX_ID, ST_INDEX_NAME, - ST_RESERVED_SEQUENCE_RANGE, ST_SEQUENCE_ID, ST_SEQUENCE_NAME, ST_TABLE_ID, ST_TABLE_NAME, - }; - use spacetimedb_lib::db::auth::{StAccess, StTableType}; - use spacetimedb_lib::error::ResultTest; - use spacetimedb_sats::raw_identifier::RawIdentifier; - use spacetimedb_sats::{product, AlgebraicType, ProductType, ProductValue}; - use spacetimedb_schema::def::{BTreeAlgorithm, IndexAlgorithm}; - use spacetimedb_schema::identifier::Identifier; - use spacetimedb_schema::relation::{FieldName, Header}; - use spacetimedb_schema::schema::{ColumnSchema, IndexSchema, TableSchema}; - use spacetimedb_schema::table_name::TableName; - use spacetimedb_vm::eval::run_ast; - use spacetimedb_vm::eval::test_helpers::{mem_table, mem_table_one_u64, scalar}; - use spacetimedb_vm::operator::OpCmp; - use std::sync::Arc; - - pub(crate) fn create_table_with_rows( - db: &RelationalDB, - tx: &mut MutTx, - table_name: &str, - schema: ProductType, - rows: &[ProductValue], - access: StAccess, - ) -> ResultTest> { - let columns = schema - .elements - .iter() - .cloned() - .enumerate() - .map(|(i, element)| ColumnSchema { - table_id: TableId::SENTINEL, - col_name: Identifier::new(element.name.unwrap()).unwrap(), - col_type: element.algebraic_type, - col_pos: ColId(i as _), - alias: None, - }) - .collect(); - - let table_id = db.create_table( - tx, - TableSchema::new( - TableId::SENTINEL, - TableName::for_test(table_name), - None, - columns, - vec![], - vec![], - vec![], - StTableType::User, - access, - None, - None, - false, - None, - ), - )?; - let schema = db.schema_for_table_mut(tx, table_id)?; - - for row in rows { - insert(db, tx, table_id, &row)?; - } - - Ok(schema) - } - - /// Creates a table "inventory" with `(inventory_id: u64, name : String)` as columns. - fn create_inv_table(db: &RelationalDB, tx: &mut MutTx) -> ResultTest<(Arc, ProductValue)> { - let schema_ty = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let row = product!(1u64, "health"); - let schema = create_table_with_rows( - db, - tx, - "inventory", - schema_ty.clone(), - std::slice::from_ref(&row), - StAccess::Public, - )?; - Ok((schema, row)) - } - - fn run_query( - db: &RelationalDB, - q: QueryExpr, - sources: SourceSet, N>, - ) -> MemTable { - with_read_only(db, |tx| { - let mut tx_mode = (&*tx).into(); - let p = &mut DbProgram::new(db, &mut tx_mode, AuthCtx::for_testing()); - match run_ast(p, q.into(), sources) { - Code::Table(x) => x, - x => panic!("invalid result {x}"), - } - }) - } - - #[test] - fn test_db_query_inner_join() -> ResultTest<()> { - let stdb = TestDB::durable()?; - - let (schema, _) = with_auto_commit(&stdb, |tx| create_inv_table(&stdb, tx))?; - let table_id = schema.table_id; - - let data = mem_table_one_u64(u32::MAX.into()); - let mut sources = SourceSet::<_, 1>::empty(); - let rhs_source_expr = sources.add_mem_table(data); - let q = QueryExpr::new(&*schema).with_join_inner(rhs_source_expr, 0.into(), 0.into(), false); - let result = run_query(&stdb, q, sources); - - // The expected result. - let inv = ProductType::from([AlgebraicType::U64, AlgebraicType::String, AlgebraicType::U64]); - let row = product![1u64, "health", 1u64]; - let input = mem_table(table_id, inv, vec![row]); - - assert_eq!(result.data, input.data, "Inventory"); - - Ok(()) - } - - #[test] - fn test_db_query_semijoin() -> ResultTest<()> { - let stdb = TestDB::durable()?; - - let (schema, row) = with_auto_commit(&stdb, |tx| create_inv_table(&stdb, tx))?; - - let data = mem_table_one_u64(u32::MAX.into()); - let mut sources = SourceSet::<_, 1>::empty(); - let rhs_source_expr = sources.add_mem_table(data); - let q = QueryExpr::new(&*schema).with_join_inner(rhs_source_expr, 0.into(), 0.into(), true); - let result = run_query(&stdb, q, sources); - - // The expected result. - let input = mem_table(schema.table_id, schema.get_row_type().clone(), vec![row]); - assert_eq!(result.data, input.data, "Inventory"); - - Ok(()) - } - - fn check_catalog(db: &RelationalDB, name: &str, row: ProductValue, q: QueryExpr, schema: &TableSchema) { - let result = run_query(db, q, [].into()); - let input = MemTable::from_iter(Header::from(schema).into(), [row]); - assert_eq!(result, input, "{}", name); - } - - #[test] - fn test_query_catalog_tables() -> ResultTest<()> { - let stdb = TestDB::durable()?; - let schema = &*stdb.schema_for_table(&begin_tx(&stdb), ST_TABLE_ID).unwrap(); - - let q = QueryExpr::new(schema) - .with_select_cmp( - OpCmp::Eq, - FieldName::new(ST_TABLE_ID, StTableFields::TableName.into()), - scalar(ST_TABLE_NAME), - ) - .unwrap(); - let st_table_row = StTableRow { - table_id: ST_TABLE_ID, - table_name: TableName::for_test(ST_TABLE_NAME), - table_type: StTableType::System, - table_access: StAccess::Public, - table_primary_key: Some(StTableFields::TableId.into()), - } - .into(); - check_catalog(&stdb, ST_TABLE_NAME, st_table_row, q, schema); - - Ok(()) - } - - #[test] - fn test_query_catalog_columns() -> ResultTest<()> { - let stdb = TestDB::durable()?; - let schema = &*stdb.schema_for_table(&begin_tx(&stdb), ST_COLUMN_ID).unwrap(); - - let q = QueryExpr::new(schema) - .with_select_cmp( - OpCmp::Eq, - FieldName::new(ST_COLUMN_ID, StColumnFields::TableId.into()), - scalar(ST_COLUMN_ID), - ) - .unwrap() - .with_select_cmp( - OpCmp::Eq, - FieldName::new(ST_COLUMN_ID, StColumnFields::ColPos.into()), - scalar(StColumnFields::TableId as u16), - ) - .unwrap(); - let st_column_row = StColumnRow { - table_id: ST_COLUMN_ID, - col_pos: StColumnFields::TableId.col_id(), - col_name: StColumnFields::TableId.col_name(), - col_type: AlgebraicType::U32.into(), - } - .into(); - check_catalog(&stdb, ST_COLUMN_NAME, st_column_row, q, schema); - - Ok(()) - } - - #[test] - fn test_query_catalog_indexes() -> ResultTest<()> { - let db = TestDB::durable()?; - - let (schema, _) = with_auto_commit(&db, |tx| create_inv_table(&db, tx))?; - let table_id = schema.table_id; - let columns = ColList::from(ColId(0)); - let index_name: RawIdentifier = "idx_1".into(); - let is_unique = false; - - let index = IndexSchema { - table_id, - index_id: IndexId::SENTINEL, - index_name: index_name.clone(), - index_algorithm: IndexAlgorithm::BTree(BTreeAlgorithm { - columns: columns.clone(), - }), - alias: None, - }; - let index_id = with_auto_commit(&db, |tx| db.create_index(tx, index, is_unique))?; - - let indexes_schema = &*db.schema_for_table(&begin_tx(&db), ST_INDEX_ID).unwrap(); - let q = QueryExpr::new(indexes_schema) - .with_select_cmp( - OpCmp::Eq, - FieldName::new(ST_INDEX_ID, StIndexFields::IndexName.into()), - scalar(&*index_name), - ) - .unwrap(); - - let st_index_row = StIndexRow { - index_id, - index_name: index_name.clone(), - table_id, - index_algorithm: StIndexAlgorithm::BTree { columns }, - } - .into(); - check_catalog(&db, ST_INDEX_NAME, st_index_row, q, indexes_schema); - - Ok(()) - } - - #[test] - fn test_query_catalog_sequences() -> ResultTest<()> { - let db = TestDB::durable()?; - - let schema = &*db.schema_for_table(&begin_tx(&db), ST_SEQUENCE_ID).unwrap(); - let q = QueryExpr::new(schema) - .with_select_cmp( - OpCmp::Eq, - FieldName::new(ST_SEQUENCE_ID, StSequenceFields::TableId.into()), - scalar(ST_SEQUENCE_ID), - ) - .unwrap(); - let st_sequence_row = StSequenceRow { - sequence_id: 5.into(), - sequence_name: "st_sequence_sequence_id_seq".into(), - table_id: ST_SEQUENCE_ID, - col_pos: 0.into(), - increment: 1, - start: ST_RESERVED_SEQUENCE_RANGE as i128 + 1, - min_value: 1, - max_value: i128::MAX, - allocated: ST_RESERVED_SEQUENCE_RANGE as i128, - } - .into(); - check_catalog(&db, ST_SEQUENCE_NAME, st_sequence_row, q, schema); - - Ok(()) - } -} diff --git a/crates/datastore/src/system_tables.rs b/crates/datastore/src/system_tables.rs index 9978be4f0b3..e75cc76b365 100644 --- a/crates/datastore/src/system_tables.rs +++ b/crates/datastore/src/system_tables.rs @@ -1655,28 +1655,16 @@ impl From for AlgebraicValue { /// If the cardinality of a query is estimated to exceed this limit, /// it will be rejected before being executed. pub const ST_VARNAME_ROW_LIMIT: &str = "row_limit"; -/// A system variable that defines a threshold for logging slow queries. -pub const ST_VARNAME_SLOW_QRY: &str = "slow_ad_hoc_query_ms"; -/// A system variable that defines a threshold for logging slow subscriptions. -pub const ST_VARNAME_SLOW_SUB: &str = "slow_subscription_query_ms"; -/// A system variable that defines a threshold for logging slow tx updates. -pub const ST_VARNAME_SLOW_INC: &str = "slow_tx_update_ms"; /// The name of a system variable in `st_var` #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum StVarName { RowLimit, - SlowQryThreshold, - SlowSubThreshold, - SlowIncThreshold, } impl From for &'static str { fn from(value: StVarName) -> Self { match value { StVarName::RowLimit => ST_VARNAME_ROW_LIMIT, - StVarName::SlowQryThreshold => ST_VARNAME_SLOW_QRY, - StVarName::SlowSubThreshold => ST_VARNAME_SLOW_SUB, - StVarName::SlowIncThreshold => ST_VARNAME_SLOW_INC, } } } @@ -1692,9 +1680,6 @@ impl FromStr for StVarName { fn from_str(s: &str) -> Result { match s { ST_VARNAME_ROW_LIMIT => Ok(StVarName::RowLimit), - ST_VARNAME_SLOW_QRY => Ok(StVarName::SlowQryThreshold), - ST_VARNAME_SLOW_SUB => Ok(StVarName::SlowSubThreshold), - ST_VARNAME_SLOW_INC => Ok(StVarName::SlowIncThreshold), _ => Err(anyhow::anyhow!("Invalid system variable {s}")), } } @@ -1711,10 +1696,7 @@ impl<'de> Deserialize<'de> for StVarName { impl StVarName { pub fn type_of(&self) -> AlgebraicType { match self { - StVarName::RowLimit - | StVarName::SlowQryThreshold - | StVarName::SlowSubThreshold - | StVarName::SlowIncThreshold => AlgebraicType::U64, + StVarName::RowLimit => AlgebraicType::U64, } } } diff --git a/crates/execution/src/lib.rs b/crates/execution/src/lib.rs index f239d165771..a4f259b6885 100644 --- a/crates/execution/src/lib.rs +++ b/crates/execution/src/lib.rs @@ -95,7 +95,7 @@ pub trait DeltaStore { } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum Row<'a> { Ptr(RowRef<'a>), Ref(&'a ProductValue), @@ -167,6 +167,73 @@ impl ToBsatn for Row<'_> { } } +#[derive(Clone, Debug)] +pub enum RelValue<'a> { + Row(Row<'a>), + Projection(ProductValue), +} + +impl<'a> From> for RelValue<'a> { + fn from(value: Row<'a>) -> Self { + Self::Row(value) + } +} + +impl From for RelValue<'_> { + fn from(value: ProductValue) -> Self { + Self::Projection(value) + } +} + +impl PartialEq for RelValue<'_> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Row(x), Self::Row(y)) => x == y, + (Self::Projection(x), Self::Projection(y)) => x == y, + (Self::Row(x), Self::Projection(y)) | (Self::Projection(y), Self::Row(x)) => x.to_product_value() == *y, + } + } +} + +impl Eq for RelValue<'_> {} + +impl Hash for RelValue<'_> { + fn hash(&self, state: &mut H) { + match self { + Self::Row(x) => x.hash(state), + Self::Projection(x) => x.hash(state), + } + } +} + +impl_serialize!(['a] RelValue<'a>, (self, ser) => match self { + Self::Row(row) => row.serialize(ser), + Self::Projection(row) => row.serialize(ser), +}); + +impl ToBsatn for RelValue<'_> { + fn static_bsatn_size(&self) -> Option { + match self { + Self::Row(row) => row.static_bsatn_size(), + Self::Projection(row) => row.static_bsatn_size(), + } + } + + fn to_bsatn_extend(&self, buf: &mut (impl BufWriter + BufReservedFill)) -> std::result::Result<(), EncodeError> { + match self { + Self::Row(row) => row.to_bsatn_extend(buf), + Self::Projection(row) => row.to_bsatn_extend(buf), + } + } + + fn to_bsatn_vec(&self) -> std::result::Result, EncodeError> { + match self { + Self::Row(row) => row.to_bsatn_vec(), + Self::Projection(row) => row.to_bsatn_vec(), + } + } +} + impl ProjectField for Row<'_> { fn project(&self, field: &TupleField) -> AlgebraicValue { match self { diff --git a/crates/expr/src/statement.rs b/crates/expr/src/statement.rs index 127876fdd74..b7422dd031c 100644 --- a/crates/expr/src/statement.rs +++ b/crates/expr/src/statement.rs @@ -263,12 +263,9 @@ pub struct InvalidVar { } const VAR_ROW_LIMIT: &str = "row_limit"; -const VAR_SLOW_QUERY: &str = "slow_ad_hoc_query_ms"; -const VAR_SLOW_UPDATE: &str = "slow_tx_update_ms"; -const VAR_SLOW_SUB: &str = "slow_subscription_query_ms"; fn is_var_valid(var: &str) -> bool { - var == VAR_ROW_LIMIT || var == VAR_SLOW_QUERY || var == VAR_SLOW_UPDATE || var == VAR_SLOW_SUB + var == VAR_ROW_LIMIT } const ST_VAR_NAME: &str = "st_var"; diff --git a/crates/sqltest/Cargo.toml b/crates/sqltest/Cargo.toml index b21bce249bd..d6b1e2c8a69 100644 --- a/crates/sqltest/Cargo.toml +++ b/crates/sqltest/Cargo.toml @@ -9,9 +9,7 @@ publish = false [dependencies] spacetimedb-lib.workspace = true spacetimedb-core = { workspace = true, features = ["test"] } -spacetimedb-datastore = { workspace = true, features = ["test"] } spacetimedb-sats.workspace = true -spacetimedb-vm.workspace = true anyhow.workspace = true async-trait.workspace = true diff --git a/crates/sqltest/src/space.rs b/crates/sqltest/src/space.rs index 00c37c35ad9..8e44a73cc05 100644 --- a/crates/sqltest/src/space.rs +++ b/crates/sqltest/src/space.rs @@ -2,16 +2,13 @@ use crate::db::DBRunner; use async_trait::async_trait; use spacetimedb::db::relational_db::tests_utils::TestDB; use spacetimedb::error::DBError; -use spacetimedb::sql::compiler::compile_sql; -use spacetimedb::sql::execute::execute_sql; +use spacetimedb::sql::execute::{run, SqlResult}; use spacetimedb::subscription::module_subscription_actor::ModuleSubscriptions; -use spacetimedb_datastore::execution_context::Workload; use spacetimedb_lib::identity::AuthCtx; use spacetimedb_sats::algebraic_value::Packed; use spacetimedb_sats::meta_type::MetaType; use spacetimedb_sats::satn::Satn; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -use spacetimedb_vm::relation::MemTable; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; use sqllogictest::{AsyncDB, ColumnType, DBOutput}; use std::fs; use std::io::Write; @@ -67,15 +64,24 @@ impl SpaceDb { }) } - pub(crate) fn run_sql(&self, sql: &str) -> anyhow::Result> { - self.conn.with_read_only(Workload::Sql, |tx| { - let ast = compile_sql(&self.conn, &AuthCtx::for_testing(), tx, sql)?; - let (subs, _runtime) = ModuleSubscriptions::for_test_new_runtime(Arc::clone(&self.conn.db)); - let result = execute_sql(&self.conn, sql, ast, self.auth.clone(), Some(&subs))?; - //remove comments to see which SQL worked. Can't collect it outside from lack of a hook in the external `sqllogictest` crate... :( - //append_file(&std::path::PathBuf::from(".ok.sql"), sql)?; - Ok(result) - }) + pub(crate) fn run_sql(&self, sql: &str) -> anyhow::Result<(Vec, Vec)> { + let (subs, runtime) = ModuleSubscriptions::for_test_new_runtime(Arc::clone(&self.conn.db)); + let mut head = Vec::new(); + let SqlResult { rows, .. } = runtime.block_on(run( + Arc::clone(&self.conn.db), + sql.to_string(), + self.auth.clone(), + Some(subs), + None, + &mut head, + ))?; + + let header = head.into_iter().map(|(_, ty)| Kind(ty)).collect(); + + // Remove comments to see which SQL worked. Can't collect it outside from lack of a hook in + // the external `sqllogictest` crate. :( + // append_file(&std::path::PathBuf::from(".ok.sql"), sql)?; + Ok((header, rows)) } pub fn into_db(self) -> DBRunner { @@ -89,20 +95,12 @@ impl AsyncDB for SpaceDb { type ColumnType = Kind; async fn run(&mut self, sql: &str) -> Result, Self::Error> { - let is_query_sql = { - let lower_sql = sql.trim_start().to_ascii_lowercase(); - lower_sql.starts_with("select") - }; - let r = self.run_sql(sql)?; - if !is_query_sql { + let (header, rows) = self.run_sql(sql)?; + if header.is_empty() { return Ok(DBOutput::StatementComplete(0)); } - let r = r.into_iter().next().unwrap(); - - let header = r.head.fields.iter().map(|x| Kind(x.algebraic_type.clone())).collect(); - let output: Vec> = r - .data + let output: Vec> = rows .into_iter() .map(|row| { row.into_iter() diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml deleted file mode 100644 index e0851c967f9..00000000000 --- a/crates/vm/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -[package] -name = "spacetimedb-vm" -version.workspace = true -edition.workspace = true -license-file = "LICENSE" -description = "A VM for SpacetimeDB" -rust-version.workspace = true - -[features] -test = ["spacetimedb-schema/test"] - -[dependencies] -spacetimedb-data-structures.workspace = true -spacetimedb-execution.workspace = true -spacetimedb-sats.workspace = true -spacetimedb-lib.workspace = true -spacetimedb-primitives.workspace = true -spacetimedb-table.workspace = true -spacetimedb-schema.workspace = true - -anyhow.workspace = true -arrayvec.workspace = true -derive_more.workspace = true -itertools.workspace = true -log.workspace = true -smallvec.workspace = true -thiserror.workspace = true -tracing.workspace = true - -[dev-dependencies] -spacetimedb-schema = { path = "../schema", features = ["test"] } -tempfile.workspace = true -typed-arena.workspace = true - -[lints] -workspace = true diff --git a/crates/vm/LICENSE b/crates/vm/LICENSE deleted file mode 120000 index 8540cf8a991..00000000000 --- a/crates/vm/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../licenses/BSL.txt \ No newline at end of file diff --git a/crates/vm/README.md b/crates/vm/README.md deleted file mode 100644 index fc5b684dd86..00000000000 --- a/crates/vm/README.md +++ /dev/null @@ -1,3 +0,0 @@ -> ⚠️ **Internal Crate** ⚠️ -> -> This crate is intended for internal use only. It is **not** stable and may change without notice. diff --git a/crates/vm/src/errors.rs b/crates/vm/src/errors.rs deleted file mode 100644 index 60c0dfa89dc..00000000000 --- a/crates/vm/src/errors.rs +++ /dev/null @@ -1,161 +0,0 @@ -use spacetimedb_lib::operator::OpLogic; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -use spacetimedb_schema::def::error::{AuthError, RelationError}; -use std::fmt; -use thiserror::Error; - -use crate::expr::SourceId; - -#[derive(Error, Debug)] -pub enum ConfigError { - #[error("Config parameter `{0}` not found.")] - NotFound(String), - #[error("Value for config parameter `{0}` is invalid: `{1:?}`. Expected: `{2:?}`")] - TypeError(String, AlgebraicValue, AlgebraicType), -} - -/// Typing Errors -#[derive(Error, Debug)] -pub enum ErrorType { - #[error("Error Parsing `{value}` into type [{ty}]: {err}")] - Parse { value: String, ty: String, err: String }, - #[error("Type Mismatch Join: `{lhs}` != `{rhs}`")] - TypeMismatchJoin { lhs: String, rhs: String }, - #[error("Type Mismatch: `{lhs}` != `{rhs}`")] - TypeMismatch { lhs: String, rhs: String }, - #[error("Type Mismatch: `{lhs}` {op} `{rhs}`, both sides must be an `{expected}` expression")] - TypeMismatchLogic { - op: OpLogic, - lhs: String, - rhs: String, - expected: String, - }, -} - -/// Vm Errors -#[derive(Error, Debug)] -pub enum ErrorVm { - #[error("TypeError {0}")] - Type(#[from] ErrorType), - #[error("ErrorLang {0}")] - Lang(#[from] ErrorLang), - #[error("RelationError {0}")] - Rel(#[from] RelationError), - #[error("AuthError {0}")] - Auth(#[from] AuthError), - #[error("Unsupported: {0}")] - Unsupported(String), - #[error("No source table with index {0:?}")] - NoSuchSource(SourceId), - #[error("ConfigError: {0}")] - Config(#[from] ConfigError), - #[error("{0}")] - Other(#[from] anyhow::Error), -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum ErrorKind { - Custom(String), - Compiler, - TypeMismatch, - Db, - Query, - Duplicated, - Invalid, - NotFound, - Params, - OutOfBounds, - Timeout, - Unauthorized, -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct ErrorCtx { - key: String, - value: String, -} - -impl ErrorCtx { - pub fn new(key: &str, value: &str) -> Self { - Self { - key: key.into(), - value: value.into(), - } - } -} - -/// Define the main User Error type for the VM -#[derive(Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct ErrorLang { - pub kind: ErrorKind, - pub msg: Option, - /// Optional context for the Error: Which record was not found, what value was invalid, etc. - pub context: Option>, -} - -impl ErrorLang { - pub fn new(kind: ErrorKind, msg: Option<&str>) -> Self { - Self { - kind, - msg: msg.map(|x| x.to_string()), - context: None, - } - } - - pub fn with_ctx(self, of: ErrorCtx) -> Self { - let mut x = self; - if let Some(ref mut s) = x.context { - s.push(of) - } else { - x.context = Some(vec![of]) - } - x - } -} - -impl fmt::Display for ErrorLang { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}Error", self.kind)?; - if let Some(msg) = &self.msg { - writeln!(f, ": \"{msg}\"")?; - } - if let Some(err) = self.context.as_deref() { - writeln!(f, " Context:")?; - for e in err { - writeln!(f, " {}: {}", e.key, e.value)?; - } - } - Ok(()) - } -} - -impl From for ErrorLang { - fn from(x: ErrorType) -> Self { - ErrorLang::new(ErrorKind::TypeMismatch, Some(&x.to_string())) - } -} - -impl From for ErrorLang { - fn from(err: ErrorVm) -> Self { - match err { - ErrorVm::Type(err) => err.into(), - ErrorVm::Other(err) => ErrorLang::new(ErrorKind::Db, Some(&err.to_string())), - ErrorVm::Rel(err) => ErrorLang::new(ErrorKind::Db, Some(&err.to_string())), - ErrorVm::Unsupported(err) => ErrorLang::new(ErrorKind::Compiler, Some(&err)), - ErrorVm::Lang(err) => err, - ErrorVm::Auth(err) => ErrorLang::new(ErrorKind::Unauthorized, Some(&err.to_string())), - ErrorVm::Config(err) => ErrorLang::new(ErrorKind::Db, Some(&err.to_string())), - err @ ErrorVm::NoSuchSource(_) => ErrorLang { - kind: ErrorKind::Invalid, - msg: Some(format!("{err:?}")), - context: None, - }, - } - } -} - -impl From for ErrorLang { - fn from(err: RelationError) -> Self { - ErrorVm::Rel(err).into() - } -} diff --git a/crates/vm/src/eval.rs b/crates/vm/src/eval.rs deleted file mode 100644 index d30c15358a4..00000000000 --- a/crates/vm/src/eval.rs +++ /dev/null @@ -1,555 +0,0 @@ -use crate::errors::ErrorVm; -use crate::expr::{Code, ColumnOp, Expr, JoinExpr, ProjectExpr, SourceSet}; -use crate::program::{ProgramVm, Sources}; -use crate::rel_ops::RelOps; -use crate::relation::RelValue; -use spacetimedb_sats::ProductValue; -use spacetimedb_table::table::RowRef; - -pub type IterRows<'a> = dyn RelOps<'a> + 'a; - -/// Utility to simplify the creation of a boxed iterator. -pub fn box_iter<'a, T: Iterator> + 'a>(iter: T) -> Box> + 'a> { - Box::new(iter) -} - -pub fn build_select<'a>(base: impl RelOps<'a> + 'a, cmp: &'a ColumnOp) -> Box> { - Box::new(base.select(move |row| cmp.eval_bool(row))) -} - -pub fn build_project<'a>(base: impl RelOps<'a> + 'a, proj: &'a ProjectExpr) -> Box> { - Box::new(base.project(&proj.cols, move |cols, row| { - RelValue::Projection(row.project_owned(cols)) - })) -} - -pub fn join_inner<'a>(lhs: impl RelOps<'a> + 'a, rhs: impl RelOps<'a> + 'a, q: &'a JoinExpr) -> Box> { - let col_lhs = q.col_lhs.idx(); - let col_rhs = q.col_rhs.idx(); - let key_lhs = move |row: &RelValue<'_>| row.read_column(col_lhs).unwrap().into_owned(); - let key_rhs = move |row: &RelValue<'_>| row.read_column(col_rhs).unwrap().into_owned(); - let pred = move |l: &RelValue<'_>, r: &RelValue<'_>| l.read_column(col_lhs) == r.read_column(col_rhs); - - if q.inner.is_some() { - Box::new(lhs.join_inner(rhs, key_lhs, key_rhs, pred, move |l, r| l.extend(r))) - } else { - Box::new(lhs.join_inner(rhs, key_lhs, key_rhs, pred, move |l, _| l)) - } -} - -/// Execute the code -pub fn eval(p: &mut P, code: Code, sources: Sources<'_, N>) -> Code { - match code { - c @ (Code::Value(_) | Code::Halt(_) | Code::Table(_)) => c, - Code::Block(lines) => { - let mut result = Vec::with_capacity(lines.len()); - for x in lines { - match eval(p, x, sources) { - Code::Pass(None) => {} - r => result.push(r), - }; - } - - match result.len() { - 0 => Code::Pass(None), - 1 => result.pop().unwrap(), - _ => Code::Block(result), - } - } - Code::Crud(q) => p.eval_query(q, sources).unwrap_or_else(|err| Code::Halt(err.into())), - Code::Pass(x) => Code::Pass(x), - } -} - -fn to_vec(of: Vec) -> Code { - let mut new = Vec::with_capacity(of.len()); - for ast in of { - let code = match ast { - Expr::Block(x) => to_vec(x), - Expr::Crud(x) => Code::Crud(*x), - x => Code::Halt(ErrorVm::Unsupported(format!("{x:?}")).into()), - }; - new.push(code); - } - Code::Block(new) -} - -/// Optimize, compile & run the [Expr] -pub fn run_ast( - p: &mut P, - ast: Expr, - mut sources: SourceSet, N>, -) -> Code { - let code = match ast { - Expr::Block(x) => to_vec(x), - Expr::Crud(x) => Code::Crud(*x), - Expr::Value(x) => Code::Value(x), - Expr::Halt(err) => Code::Halt(err), - Expr::Ident(x) => Code::Halt(ErrorVm::Unsupported(format!("Ident {x}")).into()), - }; - eval(p, code, &mut sources) -} - -/// Used internally for testing SQL JOINS. -#[doc(hidden)] -#[cfg(any(test, feature = "test"))] -pub mod test_helpers { - use crate::relation::MemTable; - use core::hash::BuildHasher as _; - use spacetimedb_data_structures::map::DefaultHashBuilder; - use spacetimedb_primitives::TableId; - use spacetimedb_sats::{product, AlgebraicType, AlgebraicValue, ProductType, ProductValue}; - use spacetimedb_schema::{ - relation::{Column, FieldName, Header}, - table_name::TableName, - }; - use std::sync::Arc; - - pub fn mem_table_without_table_name(mem: &MemTable) -> (&[Column], &[ProductValue]) { - (&mem.head.fields, &mem.data) - } - - pub fn header_for_mem_table(table_id: TableId, fields: ProductType) -> Header { - let hash = DefaultHashBuilder::default().hash_one(&fields); - let table_name = TableName::for_test(&format!("mem_{hash:x}")); - - let cols = Vec::from(fields.elements) - .into_iter() - .enumerate() - .map(|(pos, f)| Column::new(FieldName::new(table_id, pos.into()), f.algebraic_type)) - .collect(); - - Header::new(table_id, table_name, cols, Vec::new()) - } - - pub fn mem_table_one_u64(table_id: TableId) -> MemTable { - let ty = ProductType::from([AlgebraicType::U64]); - mem_table(table_id, ty, product![1u64]) - } - - pub fn mem_table>( - table_id: TableId, - ty: impl Into, - iter: impl IntoIterator, - ) -> MemTable { - let head = header_for_mem_table(table_id, ty.into()); - MemTable::from_iter(Arc::new(head), iter.into_iter().map(Into::into)) - } - - pub fn scalar(of: impl Into) -> AlgebraicValue { - of.into() - } - - pub struct GameData { - pub location: MemTable, - pub inv: MemTable, - pub player: MemTable, - pub location_ty: ProductType, - pub inv_ty: ProductType, - pub player_ty: ProductType, - } - - pub fn create_game_data() -> GameData { - let inv_ty = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let row = product!(1u64, "health"); - let inv = mem_table(0.into(), inv_ty.clone(), [row]); - - let player_ty = ProductType::from([("entity_id", AlgebraicType::U64), ("inventory_id", AlgebraicType::U64)]); - let row1 = product!(100u64, 1u64); - let row2 = product!(200u64, 1u64); - let row3 = product!(300u64, 1u64); - let player = mem_table(1.into(), player_ty.clone(), [row1, row2, row3]); - - let location_ty = ProductType::from([ - ("entity_id", AlgebraicType::U64), - ("x", AlgebraicType::F32), - ("z", AlgebraicType::F32), - ]); - let row1 = product!(100u64, 0.0f32, 32.0f32); - let row2 = product!(100u64, 1.0f32, 31.0f32); - let location = mem_table(2.into(), location_ty.clone(), [row1, row2]); - - GameData { - location, - inv, - player, - inv_ty, - player_ty, - location_ty, - } - } -} - -#[cfg(test)] -pub mod tests { - #![allow(clippy::disallowed_macros)] - - use super::test_helpers::*; - use super::*; - use crate::expr::{CrudExpr, Query, QueryExpr, SourceExpr, SourceSet}; - use crate::iterators::RelIter; - use crate::relation::MemTable; - use spacetimedb_lib::operator::{OpCmp, OpLogic}; - use spacetimedb_primitives::ColId; - use spacetimedb_sats::{product, AlgebraicType, ProductType}; - use spacetimedb_schema::def::error::RelationError; - use spacetimedb_schema::relation::{FieldName, Header}; - - /// From an original source of `result`s, applies `queries` and returns a final set of results. - fn build_query<'a, const N: usize>( - mut result: Box>, - queries: &'a [Query], - sources: Sources<'_, N>, - ) -> Box> { - for q in queries { - result = match q { - Query::IndexScan(_) | Query::IndexJoin(_) => panic!("unsupported on memory tables"), - Query::Select(cmp) => build_select(result, cmp), - Query::Project(proj) => build_project(result, proj), - Query::JoinInner(q) => { - let rhs = build_source_expr_query(sources, &q.rhs.source); - let rhs = build_query(rhs, &q.rhs.query, sources); - join_inner(result, rhs, q) - } - }; - } - result - } - - fn build_source_expr_query<'a, const N: usize>(sources: Sources<'_, N>, source: &SourceExpr) -> Box> { - let source_id = source.source_id().unwrap(); - let table = sources.take(source_id).unwrap(); - Box::new(RelIter::new(table.into_iter().map(RelValue::Projection))) - } - - /// A default program that run in-memory without a database - struct Program; - - impl ProgramVm for Program { - fn eval_query(&mut self, query: CrudExpr, sources: Sources<'_, N>) -> Result { - match query { - CrudExpr::Query(query) => { - let result = build_source_expr_query(sources, &query.source); - let rows = build_query(result, &query.query, sources).collect_vec(|row| row.into_product_value()); - - let head = query.head().clone(); - - Ok(Code::Table(MemTable::new(head, query.source.table_access(), rows))) - } - _ => todo!(), - } - } - } - - fn run_query(ast: Expr, sources: SourceSet, N>) -> MemTable { - match run_ast(&mut Program, ast, sources) { - Code::Table(x) => x, - x => panic!("Unexpected result on query: {x}"), - } - } - - fn get_field_pos(table: &MemTable, pos: usize) -> FieldName { - *table.head.fields.get(pos).map(|x| &x.field).unwrap() - } - - #[test] - fn test_select() { - let input = mem_table_one_u64(0.into()); - let field = get_field_pos(&input, 0); - let mut sources = SourceSet::<_, 1>::empty(); - let source_expr = sources.add_mem_table(input); - - let q = QueryExpr::new(source_expr) - .with_select_cmp(OpCmp::Eq, field, scalar(1u64)) - .unwrap(); - - let head = q.head().clone(); - - let result = run_query(q.into(), sources); - let row = product![1u64]; - assert_eq!(result, MemTable::from_iter(head, [row]), "Query"); - } - - #[test] - fn test_project() { - let p = &mut Program; - let table = mem_table_one_u64(0.into()); - - let mut sources = SourceSet::<_, 1>::empty(); - let source_expr = sources.add_mem_table(table.clone()); - - let source = QueryExpr::new(source_expr); - let field = get_field_pos(&table, 0); - let q = source.clone().with_project([field.into()].into(), None).unwrap(); - let head = q.head().clone(); - - let result = run_ast(p, q.into(), sources); - let row = product![1u64]; - assert_eq!(result, Code::Table(MemTable::from_iter(head.clone(), [row])), "Project"); - } - - #[test] - fn test_project_out_of_bounds() { - let table = mem_table_one_u64(0.into()); - - let mut sources = SourceSet::<_, 1>::empty(); - let source_expr = sources.add_mem_table(table.clone()); - - let source = QueryExpr::new(source_expr); - // This field is out of bounds of `table`'s header, so `run_ast` will panic. - let field = FieldName::new(table.head.table_id, 1.into()); - assert!(matches!( - source.with_project([field.into()].into(), None).unwrap_err(), - RelationError::FieldNotFound(_, f) if f == field, - )); - } - - #[test] - fn test_join_inner() { - let table_id = 0.into(); - let table = mem_table_one_u64(table_id); - let table_name = table.head.table_name.clone(); - let col: ColId = 0.into(); - let field = table.head.fields[col.idx()].clone(); - - let mut sources = SourceSet::<_, 2>::empty(); - let source_expr = sources.add_mem_table(table.clone()); - let second_source_expr = sources.add_mem_table(table); - - let q = QueryExpr::new(source_expr).with_join_inner(second_source_expr, col, col, false); - let result = run_query(q.into(), sources); - - // The expected result. - let head = Header::new(table_id, table_name, [field.clone(), field].into(), Vec::new()); - let input = MemTable::from_iter(head.into(), [product!(1u64, 1u64)]); - - println!("{}", &result.head); - println!("{}", &input.head); - - assert_eq!( - mem_table_without_table_name(&result), - mem_table_without_table_name(&input), - "Project" - ); - } - - #[test] - fn test_semijoin() { - let table_id = 0.into(); - let table = mem_table_one_u64(table_id); - let col = 0.into(); - - let mut sources = SourceSet::<_, 2>::empty(); - let source_expr = sources.add_mem_table(table.clone()); - let second_source_expr = sources.add_mem_table(table); - - let q = QueryExpr::new(source_expr).with_join_inner(second_source_expr, col, col, true); - let result = run_query(q.into(), sources); - - // The expected result. - let inv = ProductType::from([(None, AlgebraicType::U64)]); - let input = mem_table(table_id, inv, [product![1u64]]); - - println!("{}", &result.head); - println!("{}", &input.head); - - assert_eq!( - mem_table_without_table_name(&result), - mem_table_without_table_name(&input), - "Semijoin should not be projected", - ); - } - - #[test] - fn test_query_logic() { - let inv = ProductType::from([("id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - - let row = product![1u64, "health"]; - - let input = mem_table(0.into(), inv, vec![row]); - let inv = input.clone(); - - let mut sources = SourceSet::<_, 1>::empty(); - let source_expr = sources.add_mem_table(input.clone()); - - let q = QueryExpr::new(source_expr.clone()) - .with_select_cmp(OpLogic::And, scalar(true), scalar(true)) - .unwrap(); - - let result = run_query(q.into(), sources); - - assert_eq!(result, inv.clone(), "Query And"); - - let mut sources = SourceSet::<_, 1>::empty(); - let source_expr = sources.add_mem_table(input); - - let q = QueryExpr::new(source_expr) - .with_select_cmp(OpLogic::Or, scalar(true), scalar(false)) - .unwrap(); - - let result = run_query(q.into(), sources); - - assert_eq!(result, inv, "Query Or"); - } - - #[test] - /// Inventory - /// | id: u64 | name : String | - fn test_query_inner_join() { - let inv = ProductType::from([("id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - - let row = product![1u64, "health"]; - - let table_id = 0.into(); - let input = mem_table(table_id, inv, [row]); - let col = 0.into(); - - let mut sources = SourceSet::<_, 2>::empty(); - let source_expr = sources.add_mem_table(input.clone()); - let second_source_expr = sources.add_mem_table(input); - - let q = QueryExpr::new(source_expr).with_join_inner(second_source_expr, col, col, false); - - let result = run_query(q.into(), sources); - - //The expected result - let inv = ProductType::from([ - (None, AlgebraicType::U64), - (Some("id"), AlgebraicType::U64), - (Some("name"), AlgebraicType::String), - ]); - let row = product![1u64, "health", 1u64, "health"]; - let input = mem_table(table_id, inv, vec![row]); - assert_eq!(result.data, input.data, "Project"); - } - - #[test] - /// Inventory - /// | id: u64 | name : String | - fn test_query_semijoin() { - let inv = ProductType::from([("id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - - let row = product![1u64, "health"]; - - let table_id = 0.into(); - let input = mem_table(table_id, inv, [row]); - let col = 0.into(); - - let mut sources = SourceSet::<_, 2>::empty(); - let source_expr = sources.add_mem_table(input.clone()); - let second_source_expr = sources.add_mem_table(input); - - let q = QueryExpr::new(source_expr).with_join_inner(second_source_expr, col, col, true); - - let result = run_query(q.into(), sources); - - // The expected result. - let inv = ProductType::from([(None, AlgebraicType::U64), (Some("name"), AlgebraicType::String)]); - let row = product![1u64, "health"]; - let input = mem_table(table_id, inv, vec![row]); - assert_eq!(result.data, input.data, "Semijoin should not project"); - } - - #[test] - /// Inventory - /// | inventory_id: u64 | name : String | - /// Player - /// | entity_id: u64 | inventory_id : u64 | - /// Location - /// | entity_id: u64 | x : f32 | z : f32 | - fn test_query_game() { - // See table above. - let data = create_game_data(); - let inv @ [inv_inventory_id, _] = [0, 1].map(|c| c.into()); - let inv_head = data.inv.head.clone(); - let inv_expr = |col: ColId| inv_head.fields[col.idx()].field.into(); - let [location_entity_id, location_x, location_z] = [0, 1, 2].map(|c| c.into()); - let [player_entity_id, player_inventory_id] = [0, 1].map(|c| c.into()); - let loc_head = data.location.head.clone(); - let loc_field = |col: ColId| loc_head.fields[col.idx()].field; - let inv_table_id = data.inv.head.table_id; - let player_table_id = data.player.head.table_id; - - let mut sources = SourceSet::<_, 2>::empty(); - let player_source_expr = sources.add_mem_table(data.player.clone()); - let location_source_expr = sources.add_mem_table(data.location.clone()); - - // SELECT - // Player.* - // FROM - // Player - // JOIN Location - // ON Location.entity_id = Player.entity_id - // WHERE x > 0 AND x <= 32 AND z > 0 AND z <= 32 - let q = QueryExpr::new(player_source_expr) - .with_join_inner(location_source_expr, player_entity_id, location_entity_id, true) - .with_select_cmp(OpCmp::Gt, loc_field(location_x), scalar(0.0f32)) - .unwrap() - .with_select_cmp(OpCmp::LtEq, loc_field(location_x), scalar(32.0f32)) - .unwrap() - .with_select_cmp(OpCmp::Gt, loc_field(location_z), scalar(0.0f32)) - .unwrap() - .with_select_cmp(OpCmp::LtEq, loc_field(location_z), scalar(32.0f32)) - .unwrap(); - - let result = run_query(q.into(), sources); - - let ty = ProductType::from([("entity_id", AlgebraicType::U64), ("inventory_id", AlgebraicType::U64)]); - let row1 = product!(100u64, 1u64); - let input = mem_table(player_table_id, ty, [row1]); - - assert_eq!( - mem_table_without_table_name(&result), - mem_table_without_table_name(&input), - "Player" - ); - - let mut sources = SourceSet::<_, 3>::empty(); - let player_source_expr = sources.add_mem_table(data.player); - let location_source_expr = sources.add_mem_table(data.location); - let inventory_source_expr = sources.add_mem_table(data.inv); - - // SELECT - // Inventory.* - // FROM - // Inventory - // JOIN Player - // ON Inventory.inventory_id = Player.inventory_id - // JOIN Location - // ON Player.entity_id = Location.entity_id - // WHERE x > 0 AND x <= 32 AND z > 0 AND z <= 32 - let q = QueryExpr::new(inventory_source_expr) - // NOTE: The way this query is set up, the first join must be an inner join, not a semijoin, - // so that the second join has access to the `Player.entity_id` field. - // This necessitates a trailing `project` to get just `Inventory.*`. - .with_join_inner(player_source_expr, inv_inventory_id, player_inventory_id, false) - .with_join_inner( - location_source_expr, - (inv_head.fields.len() + player_entity_id.idx()).into(), - location_entity_id, - true, - ) - .with_select_cmp(OpCmp::Gt, loc_field(location_x), scalar(0.0f32)) - .unwrap() - .with_select_cmp(OpCmp::LtEq, loc_field(location_x), scalar(32.0f32)) - .unwrap() - .with_select_cmp(OpCmp::Gt, loc_field(location_z), scalar(0.0f32)) - .unwrap() - .with_select_cmp(OpCmp::LtEq, loc_field(location_z), scalar(32.0f32)) - .unwrap() - .with_project(inv.map(inv_expr).into(), Some(inv_table_id)) - .unwrap(); - - let result = run_query(q.into(), sources); - - let ty = ProductType::from([("inventory_id", AlgebraicType::U64), ("name", AlgebraicType::String)]); - let row1 = product!(1u64, "health"); - let input = mem_table(inv_table_id, ty, [row1]); - - assert_eq!( - mem_table_without_table_name(&result), - mem_table_without_table_name(&input), - "Inventory" - ); - } -} diff --git a/crates/vm/src/expr.rs b/crates/vm/src/expr.rs deleted file mode 100644 index b3d4ef5020f..00000000000 --- a/crates/vm/src/expr.rs +++ /dev/null @@ -1,2666 +0,0 @@ -use crate::errors::{ErrorKind, ErrorLang}; -use crate::operator::{OpCmp, OpLogic, OpQuery}; -use crate::relation::{MemTable, RelValue}; -use arrayvec::ArrayVec; -use core::slice::from_ref; -use derive_more::From; -use itertools::Itertools; -use smallvec::SmallVec; -use spacetimedb_data_structures::map::{HashSet, IntMap}; -use spacetimedb_lib::db::auth::{StAccess, StTableType}; -use spacetimedb_lib::identity::AuthCtx; -use spacetimedb_primitives::*; -use spacetimedb_sats::satn::Satn; -use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; -use spacetimedb_schema::def::error::{AuthError, RelationError}; -use spacetimedb_schema::relation::{ColExpr, DbTable, FieldName, Header}; -use spacetimedb_schema::schema::TableSchema; -use spacetimedb_schema::table_name::TableName; -use std::borrow::Cow; -use std::cmp::Reverse; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap; -use std::ops::Bound; -use std::sync::Arc; -use std::{fmt, iter, mem}; - -/// Trait for checking if the `caller` have access to `Self` -pub trait AuthAccess { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError>; -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] -pub enum FieldExpr { - Name(FieldName), - Value(AlgebraicValue), -} - -impl FieldExpr { - pub fn strip_table(self) -> ColExpr { - match self { - Self::Name(field) => ColExpr::Col(field.col), - Self::Value(value) => ColExpr::Value(value), - } - } - - pub fn name_to_col(self, head: &Header) -> Result { - match self { - Self::Value(val) => Ok(ColExpr::Value(val)), - Self::Name(field) => head.column_pos_or_err(field).map(ColExpr::Col), - } - } -} - -impl fmt::Display for FieldExpr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - FieldExpr::Name(x) => write!(f, "{x}"), - FieldExpr::Value(x) => write!(f, "{}", x.to_satn()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] -pub enum FieldOp { - #[from] - Field(FieldExpr), - Cmp { - op: OpQuery, - lhs: Box, - rhs: Box, - }, -} - -type FieldOpFlat = SmallVec<[FieldOp; 1]>; - -impl FieldOp { - pub fn new(op: OpQuery, lhs: Self, rhs: Self) -> Self { - Self::Cmp { - op, - lhs: Box::new(lhs), - rhs: Box::new(rhs), - } - } - - pub fn cmp(field: impl Into, op: OpCmp, value: impl Into) -> Self { - Self::new( - OpQuery::Cmp(op), - Self::Field(FieldExpr::Name(field.into())), - Self::Field(FieldExpr::Value(value.into())), - ) - } - - pub fn names_to_cols(self, head: &Header) -> Result { - match self { - Self::Field(field) => field.name_to_col(head).map(ColumnOp::from), - Self::Cmp { op, lhs, rhs } => { - let lhs = lhs.names_to_cols(head)?; - let rhs = rhs.names_to_cols(head)?; - Ok(ColumnOp::new(op, lhs, rhs)) - } - } - } - - /// Flattens a nested conjunction of AND expressions. - /// - /// For example, `a = 1 AND b = 2 AND c = 3` becomes `[a = 1, b = 2, c = 3]`. - /// - /// This helps with splitting the kinds of `queries`, - /// that *could* be answered by a `index`, - /// from the ones that need to be executed with a `scan`. - pub fn flatten_ands(self) -> FieldOpFlat { - fn fill_vec(buf: &mut FieldOpFlat, op: FieldOp) { - match op { - FieldOp::Cmp { - op: OpQuery::Logic(OpLogic::And), - lhs, - rhs, - } => { - fill_vec(buf, *lhs); - fill_vec(buf, *rhs); - } - op => buf.push(op), - } - } - let mut buf = SmallVec::new(); - fill_vec(&mut buf, self); - buf - } -} - -impl fmt::Display for FieldOp { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Field(x) => { - write!(f, "{x}") - } - Self::Cmp { op, lhs, rhs } => { - write!(f, "{lhs} {op} {rhs}") - } - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)] -pub enum ColumnOp { - /// The value is the the column at `to_index(col)` in the row, i.e., `row.read_column(to_index(col))`. - #[from] - Col(ColId), - /// The value is the embedded value. - #[from] - Val(AlgebraicValue), - /// The value is `eval_cmp(cmp, row.read_column(to_index(lhs)), rhs)`. - /// This is an optimized version of `Cmp`, avoiding one depth of nesting. - ColCmpVal { - lhs: ColId, - cmp: OpCmp, - rhs: AlgebraicValue, - }, - /// The value is `eval_cmp(cmp, eval(row, lhs), eval(row, rhs))`. - Cmp { - lhs: Box, - cmp: OpCmp, - rhs: Box, - }, - /// Let `conds = eval(row, operands_i)`. - /// For `op = OpLogic::And`, the value is `all(conds)`. - /// For `op = OpLogic::Or`, the value is `any(conds)`. - Log { op: OpLogic, operands: Box<[ColumnOp]> }, -} - -impl ColumnOp { - pub fn new(op: OpQuery, lhs: Self, rhs: Self) -> Self { - match op { - OpQuery::Cmp(cmp) => match (lhs, rhs) { - (ColumnOp::Col(lhs), ColumnOp::Val(rhs)) => Self::cmp(lhs, cmp, rhs), - (lhs, rhs) => Self::Cmp { - lhs: Box::new(lhs), - cmp, - rhs: Box::new(rhs), - }, - }, - OpQuery::Logic(op) => Self::Log { - op, - operands: [lhs, rhs].into(), - }, - } - } - - pub fn cmp(col: impl Into, cmp: OpCmp, val: impl Into) -> Self { - let lhs = col.into(); - let rhs = val.into(); - Self::ColCmpVal { lhs, cmp, rhs } - } - - /// Returns a new op where `lhs` and `rhs` are logically AND-ed together. - fn and(lhs: Self, rhs: Self) -> Self { - let ands = |operands| { - let op = OpLogic::And; - Self::Log { op, operands } - }; - - match (lhs, rhs) { - // Merge a pair of ⋀ into a single ⋀. - ( - Self::Log { - op: OpLogic::And, - operands: lhs, - }, - Self::Log { - op: OpLogic::And, - operands: rhs, - }, - ) => { - let mut operands = Vec::from(lhs); - operands.append(&mut Vec::from(rhs)); - ands(operands.into()) - } - // Merge ⋀ with a single operand. - ( - Self::Log { - op: OpLogic::And, - operands: lhs, - }, - rhs, - ) => { - let mut operands = Vec::from(lhs); - operands.push(rhs); - ands(operands.into()) - } - // And together lhs and rhs. - (lhs, rhs) => ands([lhs, rhs].into()), - } - } - - /// Returns an op where `col_i op value_i` are all `AND`ed together. - fn and_cmp(op: OpCmp, cols: &ColList, value: AlgebraicValue) -> Self { - let cmp = |(col, value): (ColId, _)| Self::cmp(col, op, value); - - // For singleton constraints, the `value` must be used directly. - if let Some(head) = cols.as_singleton() { - return cmp((head, value)); - } - - // Otherwise, pair column ids and product fields together. - let operands = cols.iter().zip(value.into_product().unwrap()).map(cmp).collect(); - Self::Log { - op: OpLogic::And, - operands, - } - } - - /// Returns an op where `cols` must be within bounds. - /// This handles both the case of single-col bounds and multi-col bounds. - fn from_op_col_bounds(cols: &ColList, bounds: (Bound, Bound)) -> Self { - let (cmp, value) = match bounds { - // Equality; field <= value && field >= value <=> field = value - (Bound::Included(a), Bound::Included(b)) if a == b => (OpCmp::Eq, a), - // Inclusive lower bound => field >= value - (Bound::Included(value), Bound::Unbounded) => (OpCmp::GtEq, value), - // Exclusive lower bound => field > value - (Bound::Excluded(value), Bound::Unbounded) => (OpCmp::Gt, value), - // Inclusive upper bound => field <= value - (Bound::Unbounded, Bound::Included(value)) => (OpCmp::LtEq, value), - // Exclusive upper bound => field < value - (Bound::Unbounded, Bound::Excluded(value)) => (OpCmp::Lt, value), - (Bound::Unbounded, Bound::Unbounded) => unreachable!(), - (lower_bound, upper_bound) => { - let lhs = Self::from_op_col_bounds(cols, (lower_bound, Bound::Unbounded)); - let rhs = Self::from_op_col_bounds(cols, (Bound::Unbounded, upper_bound)); - return ColumnOp::and(lhs, rhs); - } - }; - ColumnOp::and_cmp(cmp, cols, value) - } - - /// Converts `self` to the lhs `ColId` and the `OpCmp` if this is a comparison. - fn as_col_cmp(&self) -> Option<(ColId, OpCmp)> { - match self { - Self::ColCmpVal { lhs, cmp, rhs: _ } => Some((*lhs, *cmp)), - Self::Cmp { lhs, cmp, rhs: _ } => match &**lhs { - ColumnOp::Col(col) => Some((*col, *cmp)), - _ => None, - }, - _ => None, - } - } - - /// Evaluate `self` where `ColId`s are translated to values by indexing into `row`. - fn eval<'a>(&'a self, row: &'a RelValue<'_>) -> Cow<'a, AlgebraicValue> { - let into = |b| Cow::Owned(AlgebraicValue::Bool(b)); - - match self { - Self::Col(col) => row.read_column(col.idx()).unwrap(), - Self::Val(val) => Cow::Borrowed(val), - Self::ColCmpVal { lhs, cmp, rhs } => into(Self::eval_cmp_col_val(row, *cmp, *lhs, rhs)), - Self::Cmp { lhs, cmp, rhs } => into(Self::eval_cmp(row, *cmp, lhs, rhs)), - Self::Log { op, operands } => into(Self::eval_log(row, *op, operands)), - } - } - - /// Evaluate `self` to a `bool` where `ColId`s are translated to values by indexing into `row`. - pub fn eval_bool(&self, row: &RelValue<'_>) -> bool { - match self { - Self::Col(col) => *row.read_column(col.idx()).unwrap().as_bool().unwrap(), - Self::Val(val) => *val.as_bool().unwrap(), - Self::ColCmpVal { lhs, cmp, rhs } => Self::eval_cmp_col_val(row, *cmp, *lhs, rhs), - Self::Cmp { lhs, cmp, rhs } => Self::eval_cmp(row, *cmp, lhs, rhs), - Self::Log { op, operands } => Self::eval_log(row, *op, operands), - } - } - - /// Evaluates `lhs cmp rhs` according to `Ord for AlgebraicValue`. - fn eval_op_cmp(cmp: OpCmp, lhs: &AlgebraicValue, rhs: &AlgebraicValue) -> bool { - match cmp { - OpCmp::Eq => lhs == rhs, - OpCmp::NotEq => lhs != rhs, - OpCmp::Lt => lhs < rhs, - OpCmp::LtEq => lhs <= rhs, - OpCmp::Gt => lhs > rhs, - OpCmp::GtEq => lhs >= rhs, - } - } - - /// Evaluates `lhs` to an [`AlgebraicValue`] and runs the comparison `lhs_av op rhs`. - fn eval_cmp_col_val(row: &RelValue<'_>, cmp: OpCmp, lhs: ColId, rhs: &AlgebraicValue) -> bool { - let lhs = row.read_column(lhs.idx()).unwrap(); - Self::eval_op_cmp(cmp, &lhs, rhs) - } - - /// Evaluates `lhs` and `rhs` to [`AlgebraicValue`]s - /// and then runs the comparison `cmp` on them, - /// returning the final `bool` result. - fn eval_cmp(row: &RelValue<'_>, cmp: OpCmp, lhs: &Self, rhs: &Self) -> bool { - let lhs = lhs.eval(row); - let rhs = rhs.eval(row); - Self::eval_op_cmp(cmp, &lhs, &rhs) - } - - /// Evaluates if - /// - `op = OpLogic::And` the conjunctions (`⋀`) of `opers` - /// - `op = OpLogic::Or` the disjunctions (`⋁`) of `opers` - fn eval_log(row: &RelValue<'_>, op: OpLogic, opers: &[ColumnOp]) -> bool { - match op { - OpLogic::And => opers.iter().all(|o| o.eval_bool(row)), - OpLogic::Or => opers.iter().any(|o| o.eval_bool(row)), - } - } -} - -impl fmt::Display for ColumnOp { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Col(col) => write!(f, "{col}"), - Self::Val(val) => write!(f, "{}", val.to_satn()), - Self::ColCmpVal { lhs, cmp, rhs } => write!(f, "{lhs} {cmp} {}", rhs.to_satn()), - Self::Cmp { cmp, lhs, rhs } => write!(f, "{lhs} {cmp} {rhs}"), - Self::Log { op, operands } => write!(f, "{}", operands.iter().format((*op).into())), - } - } -} - -impl From for ColumnOp { - fn from(ce: ColExpr) -> Self { - match ce { - ColExpr::Col(c) => c.into(), - ColExpr::Value(v) => v.into(), - } - } -} - -impl From for Option { - fn from(value: Query) -> Self { - match value { - Query::IndexScan(op) => Some(ColumnOp::from_op_col_bounds(&op.columns, op.bounds)), - Query::Select(op) => Some(op), - _ => None, - } - } -} - -/// An identifier for a data source (i.e. a table) in a query plan. -/// -/// When compiling a query plan, rather than embedding the inputs in the plan, -/// we annotate each input with a `SourceId`, and the compiled plan refers to its inputs by id. -/// This allows the plan to be re-used with distinct inputs, -/// assuming the inputs obey the same schema. -/// -/// Note that re-using a query plan is only a good idea -/// if the new inputs are similar to those used for compilation -/// in terms of cardinality and distribution. -#[derive(Debug, Copy, Clone, PartialEq, Eq, From, Hash)] -pub struct SourceId(pub usize); - -/// Types that relate [`SourceId`]s to their in-memory tables. -/// -/// Rather than embedding tables in query plans, we store a [`SourceExpr::InMemory`], -/// which contains the information necessary for optimization along with a `SourceId`. -/// Query execution then executes the plan, and when it encounters a `SourceExpr::InMemory`, -/// retrieves the `Self::Source` table from the corresponding provider. -/// This allows query plans to be re-used, though each execution might require a new provider. -/// -/// An in-memory table `Self::Source` is a type capable of producing [`RelValue<'a>`]s. -/// The general form of this is `Iterator>`. -/// Depending on the situation, this could be e.g., -/// - [`MemTable`], producing [`RelValue::Projection`], -/// - `&'a [ProductValue]` producing [`RelValue::ProjRef`]. -pub trait SourceProvider<'a> { - /// The type of in-memory tables that this provider uses. - type Source: 'a + IntoIterator>; - - /// Retrieve the `Self::Source` associated with `id`, if any. - /// - /// Taking the same `id` a second time may or may not yield the same source. - /// Callers should not assume that a generic provider will yield it more than once. - /// This means that a query plan may not include multiple references to the same [`SourceId`]. - /// - /// Implementations are also not obligated to inspect `id`, e.g., if there's only one option. - fn take_source(&mut self, id: SourceId) -> Option; -} - -impl<'a, I: 'a + IntoIterator>, F: FnMut(SourceId) -> Option> SourceProvider<'a> for F { - type Source = I; - fn take_source(&mut self, id: SourceId) -> Option { - self(id) - } -} - -impl<'a, I: 'a + IntoIterator>> SourceProvider<'a> for Option { - type Source = I; - fn take_source(&mut self, _: SourceId) -> Option { - self.take() - } -} - -pub struct NoInMemUsed; - -impl<'a> SourceProvider<'a> for NoInMemUsed { - type Source = iter::Empty>; - fn take_source(&mut self, _: SourceId) -> Option { - None - } -} - -/// A [`SourceProvider`] backed by an `ArrayVec`. -/// -/// Internally, the `SourceSet` stores an `Option` for each planned [`SourceId`] -/// which are [`Option::take`]n out of the set. -#[derive(Debug, PartialEq, Eq, Clone)] -#[repr(transparent)] -pub struct SourceSet( - // Benchmarks showed an improvement in performance - // on incr-select by ~10% by not using `Vec>`. - ArrayVec, N>, -); - -impl<'a, T: 'a + IntoIterator>, const N: usize> SourceProvider<'a> for SourceSet { - type Source = T; - fn take_source(&mut self, id: SourceId) -> Option { - self.take(id) - } -} - -impl From<[T; N]> for SourceSet { - #[inline] - fn from(sources: [T; N]) -> Self { - Self(sources.map(Some).into()) - } -} - -impl SourceSet { - /// Returns an empty source set. - pub fn empty() -> Self { - Self(ArrayVec::new()) - } - - /// Get a fresh `SourceId` which can be used as the id for a new entry. - fn next_id(&self) -> SourceId { - SourceId(self.0.len()) - } - - /// Insert an entry into this `SourceSet` so it can be used in a query plan, - /// and return a [`SourceId`] which can be embedded in that plan. - pub fn add(&mut self, table: T) -> SourceId { - let source_id = self.next_id(); - self.0.push(Some(table)); - source_id - } - - /// Extract the entry referred to by `id` from this `SourceSet`, - /// leaving a "gap" in its place. - /// - /// Subsequent calls to `take` on the same `id` will return `None`. - pub fn take(&mut self, id: SourceId) -> Option { - self.0.get_mut(id.0).map(mem::take).unwrap_or_default() - } - - /// Returns the number of slots for [`MemTable`]s in this set. - /// - /// Calling `self.take_mem_table(...)` or `self.take_table(...)` won't affect this number. - pub fn len(&self) -> usize { - self.0.len() - } - - /// Returns whether this set has any slots for [`MemTable`]s. - /// - /// Calling `self.take_mem_table(...)` or `self.take_table(...)` won't affect whether the set is empty. - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } -} - -impl std::ops::Index for SourceSet { - type Output = Option; - - fn index(&self, idx: SourceId) -> &Self::Output { - &self.0[idx.0] - } -} - -impl std::ops::IndexMut for SourceSet { - fn index_mut(&mut self, idx: SourceId) -> &mut Self::Output { - &mut self.0[idx.0] - } -} - -impl SourceSet, N> { - /// Insert a [`MemTable`] into this `SourceSet` so it can be used in a query plan, - /// and return a [`SourceExpr`] which can be embedded in that plan. - pub fn add_mem_table(&mut self, table: MemTable) -> SourceExpr { - let id = self.add(table.data); - SourceExpr::from_mem_table(table.head, table.table_access, id) - } -} - -/// A reference to a table within a query plan, -/// used as the source for selections, scans, filters and joins. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub enum SourceExpr { - /// A plan for a "virtual" or projected table. - /// - /// The actual in-memory table, e.g., [`MemTable`] or `&'a [ProductValue]` - /// is not stored within the query plan; - /// rather, the `source_id` is an index which corresponds to the table in e.g., a [`SourceSet`]. - /// - /// This allows query plans to be reused by supplying e.g., a new [`SourceSet`]. - InMemory { - source_id: SourceId, - header: Arc

, - table_type: StTableType, - table_access: StAccess, - }, - /// A plan for a database table. Because [`DbTable`] is small and efficiently cloneable, - /// no indirection into a [`SourceSet`] is required. - DbTable(DbTable), -} - -impl SourceExpr { - /// If `self` refers to a [`MemTable`], returns the [`SourceId`] for its location in the plan's [`SourceSet`]. - /// - /// Returns `None` if `self` refers to a [`DbTable`], as [`DbTable`]s are stored directly in the `SourceExpr`, - /// rather than indirected through the [`SourceSet`]. - pub fn source_id(&self) -> Option { - if let SourceExpr::InMemory { source_id, .. } = self { - Some(*source_id) - } else { - None - } - } - - pub fn table_name(&self) -> &TableName { - &self.head().table_name - } - - pub fn table_type(&self) -> StTableType { - match self { - SourceExpr::InMemory { table_type, .. } => *table_type, - SourceExpr::DbTable(db_table) => db_table.table_type, - } - } - - pub fn table_access(&self) -> StAccess { - match self { - SourceExpr::InMemory { table_access, .. } => *table_access, - SourceExpr::DbTable(db_table) => db_table.table_access, - } - } - - pub fn head(&self) -> &Arc
{ - match self { - SourceExpr::InMemory { header, .. } => header, - SourceExpr::DbTable(db_table) => &db_table.head, - } - } - - pub fn is_mem_table(&self) -> bool { - matches!(self, SourceExpr::InMemory { .. }) - } - - pub fn is_db_table(&self) -> bool { - matches!(self, SourceExpr::DbTable(_)) - } - - pub fn from_mem_table(header: Arc
, table_access: StAccess, id: SourceId) -> Self { - SourceExpr::InMemory { - source_id: id, - header, - table_type: StTableType::User, - table_access, - } - } - - pub fn table_id(&self) -> Option { - if let SourceExpr::DbTable(db_table) = self { - Some(db_table.table_id) - } else { - None - } - } - - /// If `self` refers to a [`DbTable`], get a reference to it. - /// - /// Returns `None` if `self` refers to a [`MemTable`]. - /// In that case, retrieving the [`MemTable`] requires inspecting the plan's corresponding [`SourceSet`] - /// via [`SourceSet::take_mem_table`] or [`SourceSet::take_table`]. - pub fn get_db_table(&self) -> Option<&DbTable> { - if let SourceExpr::DbTable(db_table) = self { - Some(db_table) - } else { - None - } - } -} - -impl From<&TableSchema> for SourceExpr { - fn from(value: &TableSchema) -> Self { - SourceExpr::DbTable(value.into()) - } -} - -/// A descriptor for an index semi join operation. -/// -/// The semantics are those of a semijoin with rows from the index or the probe side being returned. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct IndexJoin { - pub probe_side: QueryExpr, - pub probe_col: ColId, - pub index_side: SourceExpr, - pub index_select: Option, - pub index_col: ColId, - /// If true, returns rows from the `index_side`. - /// Otherwise, returns rows from the `probe_side`. - pub return_index_rows: bool, -} - -impl From for QueryExpr { - fn from(join: IndexJoin) -> Self { - let source: SourceExpr = if join.return_index_rows { - join.index_side.clone() - } else { - join.probe_side.source.clone() - }; - QueryExpr { - source, - query: vec![Query::IndexJoin(join)], - } - } -} - -impl IndexJoin { - // Reorder the index and probe sides of an index join. - // This is necessary if the indexed table has been replaced by a delta table. - // A delta table is a virtual table consisting of changes or updates to a physical table. - pub fn reorder(self, row_count: impl Fn(TableId, &str) -> i64) -> Self { - // The probe table must be a physical table. - if self.probe_side.source.is_mem_table() { - return self; - } - // It must have an index defined on the join field. - if !self - .probe_side - .source - .head() - .has_constraint(self.probe_col, Constraints::indexed()) - { - return self; - } - // It must be a linear pipeline of selections. - if !self - .probe_side - .query - .iter() - .all(|op| matches!(op, Query::Select(_) | Query::IndexScan(_))) - { - return self; - } - match self.index_side.get_db_table() { - // If the size of the indexed table is sufficiently large, - // do not reorder. - // - // TODO: This determination is quite arbitrary. - // Ultimately we should be using cardinality estimation. - Some(DbTable { head, table_id, .. }) if row_count(*table_id, &head.table_name) > 500 => self, - // If this is a delta table, we must reorder. - // If this is a sufficiently small physical table, we should reorder. - _ => { - // Merge all selections from the original probe side into a single predicate. - // This includes an index scan if present. - let predicate = self - .probe_side - .query - .into_iter() - .filter_map(>>::into) - .reduce(ColumnOp::and); - // Push any selections on the index side to the probe side. - let probe_side = if let Some(predicate) = self.index_select { - QueryExpr { - source: self.index_side, - query: vec![predicate.into()], - } - } else { - self.index_side.into() - }; - IndexJoin { - // The new probe side consists of the updated rows. - // Plus any selections from the original index probe. - probe_side, - // The new probe field is the previous index field. - probe_col: self.index_col, - // The original probe table is now the table that is being probed. - index_side: self.probe_side.source, - // Any selections from the original probe side are pulled above the index lookup. - index_select: predicate, - // The new index field is the previous probe field. - index_col: self.probe_col, - // Because we have swapped the original index and probe sides of the join, - // the new index join needs to return rows from the opposite side. - return_index_rows: !self.return_index_rows, - } - } - } - } - - // Convert this index join to an inner join, followed by a projection. - // This is needed for incremental evaluation of index joins. - // In particular when there are updates to both the left and right tables. - // In other words, when an index join has two delta tables. - pub fn to_inner_join(self) -> QueryExpr { - if self.return_index_rows { - let (col_lhs, col_rhs) = (self.index_col, self.probe_col); - let rhs = self.probe_side; - - let source = self.index_side; - let inner_join = Query::JoinInner(JoinExpr::new(rhs, col_lhs, col_rhs, None)); - let query = if let Some(predicate) = self.index_select { - vec![predicate.into(), inner_join] - } else { - vec![inner_join] - }; - QueryExpr { source, query } - } else { - let (col_lhs, col_rhs) = (self.probe_col, self.index_col); - let mut rhs: QueryExpr = self.index_side.into(); - - if let Some(predicate) = self.index_select { - rhs.query.push(predicate.into()); - } - - let source = self.probe_side.source; - let inner_join = Query::JoinInner(JoinExpr::new(rhs, col_lhs, col_rhs, None)); - let query = vec![inner_join]; - QueryExpr { source, query } - } - } -} - -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct JoinExpr { - pub rhs: QueryExpr, - pub col_lhs: ColId, - pub col_rhs: ColId, - /// If None, this is a left semi-join, returning rows only from the source table, - /// using the `rhs` as a filter. - /// - /// If Some(_), this is an inner join, returning the concatenation of the matching rows. - pub inner: Option>, -} - -impl JoinExpr { - pub fn new(rhs: QueryExpr, col_lhs: ColId, col_rhs: ColId, inner: Option>) -> Self { - Self { - rhs, - col_lhs, - col_rhs, - inner, - } - } -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum DbType { - Table, - Index, - Sequence, - Constraint, -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum Crud { - Query, - Insert, - Update, - Delete, - Create(DbType), - Drop(DbType), - Config, -} - -#[derive(Debug, Eq, PartialEq)] -pub enum CrudExpr { - Query(QueryExpr), - Insert { - table: DbTable, - rows: Vec, - }, - Update { - delete: QueryExpr, - assignments: IntMap, - }, - Delete { - query: QueryExpr, - }, - SetVar { - name: String, - literal: String, - }, - ReadVar { - name: String, - }, -} - -impl CrudExpr { - pub fn optimize(self, row_count: &impl Fn(TableId, &str) -> i64) -> Self { - match self { - CrudExpr::Query(x) => CrudExpr::Query(x.optimize(row_count)), - _ => self, - } - } - - pub fn is_reads<'a>(exprs: impl IntoIterator) -> bool { - exprs - .into_iter() - .all(|expr| matches!(expr, CrudExpr::Query(_) | CrudExpr::ReadVar { .. })) - } -} - -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct IndexScan { - pub table: DbTable, - pub columns: ColList, - pub bounds: (Bound, Bound), -} - -impl IndexScan { - /// Returns whether this is a point range. - pub fn is_point(&self) -> bool { - match &self.bounds { - (Bound::Included(lower), Bound::Included(upper)) => lower == upper, - _ => false, - } - } -} - -/// A projection operation in a query. -#[derive(Debug, Clone, Eq, PartialEq, From, Hash)] -pub struct ProjectExpr { - pub cols: Vec, - // The table id for a qualified wildcard project, if any. - // If present, further optimizations are possible. - pub wildcard_table: Option, - pub header_after: Arc
, -} - -// An individual operation in a query. -#[derive(Debug, Clone, Eq, PartialEq, From, Hash)] -pub enum Query { - // Fetching rows via an index. - IndexScan(IndexScan), - // Joining rows via an index. - // Equivalent to Index Nested Loop Join. - IndexJoin(IndexJoin), - // A filter over an intermediate relation. - // In particular it does not utilize any indexes. - // If it could it would have already been transformed into an IndexScan. - Select(ColumnOp), - // Projects a set of columns. - Project(ProjectExpr), - // A join of two relations (base or intermediate) based on equality. - // Equivalent to a Nested Loop Join. - // Its operands my use indexes but the join itself does not. - JoinInner(JoinExpr), -} - -impl Query { - /// Iterate over all [`SourceExpr`]s involved in the [`Query`]. - /// - /// Sources are yielded from left to right. Duplicates are not filtered out. - pub fn walk_sources(&self, on_source: &mut impl FnMut(&SourceExpr) -> Result<(), E>) -> Result<(), E> { - match self { - Self::Select(..) | Self::Project(..) => Ok(()), - Self::IndexScan(scan) => on_source(&SourceExpr::DbTable(scan.table.clone())), - Self::IndexJoin(join) => join.probe_side.walk_sources(on_source), - Self::JoinInner(join) => join.rhs.walk_sources(on_source), - } - } -} - -// IndexArgument represents an equality or range predicate that can be answered -// using an index. -#[derive(Debug, PartialEq, Clone)] -enum IndexArgument<'a> { - Eq { - columns: &'a ColList, - value: AlgebraicValue, - }, - LowerBound { - columns: &'a ColList, - value: AlgebraicValue, - inclusive: bool, - }, - UpperBound { - columns: &'a ColList, - value: AlgebraicValue, - inclusive: bool, - }, -} - -#[derive(Debug, PartialEq, Clone)] -enum IndexColumnOp<'a> { - Index(IndexArgument<'a>), - Scan(&'a ColumnOp), -} - -fn make_index_arg(cmp: OpCmp, columns: &ColList, value: AlgebraicValue) -> IndexColumnOp<'_> { - let arg = match cmp { - OpCmp::Eq => IndexArgument::Eq { columns, value }, - OpCmp::NotEq => unreachable!("No IndexArgument for NotEq, caller should've filtered out"), - // a < 5 => exclusive upper bound - OpCmp::Lt => IndexArgument::UpperBound { - columns, - value, - inclusive: false, - }, - // a > 5 => exclusive lower bound - OpCmp::Gt => IndexArgument::LowerBound { - columns, - value, - inclusive: false, - }, - // a <= 5 => inclusive upper bound - OpCmp::LtEq => IndexArgument::UpperBound { - columns, - value, - inclusive: true, - }, - // a >= 5 => inclusive lower bound - OpCmp::GtEq => IndexArgument::LowerBound { - columns, - value, - inclusive: true, - }, - }; - IndexColumnOp::Index(arg) -} - -#[derive(Debug)] -struct ColValue<'a> { - parent: &'a ColumnOp, - col: ColId, - cmp: OpCmp, - value: &'a AlgebraicValue, -} - -impl<'a> ColValue<'a> { - pub fn new(parent: &'a ColumnOp, col: ColId, cmp: OpCmp, value: &'a AlgebraicValue) -> Self { - Self { - parent, - col, - cmp, - value, - } - } -} - -type IndexColumnOpSink<'a> = SmallVec<[IndexColumnOp<'a>; 1]>; -type ColsIndexed = HashSet<(ColId, OpCmp)>; - -/// Pick the best indices that can serve the constraints in `op` -/// where the indices are taken from `header`. -/// -/// This function is designed to handle complex scenarios when selecting the optimal index for a query. -/// The scenarios include: -/// -/// - Combinations of multi- and single-column indexes that could refer to the same column. -/// For example, the table could have indexes `[a]` and `[a, b]]` -/// and a user could query for `WHERE a = 1 AND b = 2 AND a = 3`. -/// -/// - Query constraints can be supplied in any order; -/// i.e., both `WHERE a = 1 AND b = 2` -/// and `WHERE b = 2 AND a = 1` are valid. -/// -/// - Queries against multi-col indices must use `=`, for now, in their constraints. -/// Otherwise, the index cannot be used. -/// -/// - The use of multiple tables could generate redundant/duplicate operations like -/// `[ScanOrIndex::Index(a = 1), ScanOrIndex::Index(a = 1), ScanOrIndex::Scan(a = 1)]`. -/// This *cannot* be handled here. -/// -/// # Returns -/// -/// - A vector of `ScanOrIndex` representing the selected `index` OR `scan` operations. -/// -/// - A HashSet of `(ColId, OpCmp)` representing the columns -/// and operators that can be served by an index. -/// -/// This is required to remove the redundant operation on e.g., -/// `[ScanOrIndex::Index(a = 1), ScanOrIndex::Index(a = 1), ScanOrIndex::Scan(a = 1)]`, -/// that could be generated by calling this function several times by using multiple `JOINS`. -/// -/// # Example -/// -/// If we have a table with `indexes`: `[a], [b], [b, c]` and then try to -/// optimize `WHERE a = 1 AND d > 2 AND c = 2 AND b = 1` we should return -/// -/// -`ScanOrIndex::Index([c, b] = [1, 2])` -/// -`ScanOrIndex::Index(a = 1)` -/// -`ScanOrIndex::Scan(c = 2)` -/// -/// # Note -/// -/// NOTE: For a query like `SELECT * FROM students WHERE age > 18 AND height < 180` -/// we cannot serve this with a single `IndexScan`, -/// but rather, `select_best_index` -/// would give us two separate `IndexScan`s. -/// However, the upper layers of `QueryExpr` building will convert both of those into `Select`s. -fn select_best_index<'a>( - cols_indexed: &mut ColsIndexed, - header: &'a Header, - op: &'a ColumnOp, -) -> IndexColumnOpSink<'a> { - // Collect and sort indices by their lengths, with longest first. - // We do this so that multi-col indices are used first, as they are more efficient. - // TODO(Centril): This could be computed when `Header` is constructed. - let mut indices = header - .constraints - .iter() - .filter(|(_, c)| c.has_indexed()) - .map(|(cl, _)| cl) - .collect::>(); - indices.sort_unstable_by_key(|cl| Reverse(cl.len())); - - let mut found: IndexColumnOpSink = IndexColumnOpSink::default(); - - // Collect fields into a multi-map `(col_id, cmp) -> [col value]`. - // This gives us `log(N)` seek + deletion. - // TODO(Centril): Consider https://docs.rs/small-map/0.1.3/small_map/enum.SmallMap.html - let mut col_map = BTreeMap::<_, SmallVec<[_; 1]>>::new(); - extract_cols(op, &mut col_map, &mut found); - - // Go through each index, - // consuming all column constraints that can be served by an index. - for col_list in indices { - // (1) No columns left? We're done. - if col_map.is_empty() { - break; - } - - if let Some(head) = col_list.as_singleton() { - // Go through each operator. - // NOTE: We do not consider `OpCmp::NotEq` at the moment - // since those are typically not answered using an index. - for cmp in [OpCmp::Eq, OpCmp::Lt, OpCmp::LtEq, OpCmp::Gt, OpCmp::GtEq] { - // For a single column index, - // we want to avoid the `ProductValue` indirection of below. - for ColValue { cmp, value, col, .. } in col_map.remove(&(head, cmp)).into_iter().flatten() { - found.push(make_index_arg(cmp, col_list, value.clone())); - cols_indexed.insert((col, cmp)); - } - } - } else { - // We have a multi column index. - // Try to fit constraints `c_0 = v_0, ..., c_n = v_n` to this index. - // - // For the time being, we restrict multi-col index scans to `=` only. - // This is what our infrastructure is set-up to handle soundly. - // To extend this support to ranges requires deeper changes. - // TODO(Centril, 2024-05-30): extend this support to ranges. - let cmp = OpCmp::Eq; - - // Compute the minimum number of `=` constraints that every column in the index has. - let mut min_all_cols_num_eq = col_list - .iter() - .map(|col| col_map.get(&(col, cmp)).map_or(0, |fs| fs.len())) - .min() - .unwrap_or_default(); - - // For all of these sets of constraints, - // construct the value to compare against. - while min_all_cols_num_eq > 0 { - let mut elems = Vec::with_capacity(col_list.len() as usize); - for col in col_list.iter() { - // Cannot panic as `min_all_cols_num_eq > 0`. - let col_val = pop_multimap(&mut col_map, (col, cmp)).unwrap(); - cols_indexed.insert((col_val.col, cmp)); - // Add the column value to the product value. - elems.push(col_val.value.clone()); - } - // Construct the index scan. - let value = AlgebraicValue::product(elems); - found.push(make_index_arg(cmp, col_list, value)); - min_all_cols_num_eq -= 1; - } - } - } - - // The remaining constraints must be served by a scan. - found.extend( - col_map - .into_iter() - .flat_map(|(_, fs)| fs) - .map(|f| IndexColumnOp::Scan(f.parent)), - ); - - found -} - -/// Pop an element from `map[key]` in the multimap `map`, -/// removing the entry entirely if there are no more elements left after popping. -fn pop_multimap(map: &mut BTreeMap>, key: K) -> Option { - let Entry::Occupied(mut entry) = map.entry(key) else { - return None; - }; - let fields = entry.get_mut(); - let val = fields.pop(); - if fields.is_empty() { - entry.remove(); - } - val -} - -/// Extracts a list of `col = val` constraints that *could* be answered by an index -/// and populates those into `col_map`. -/// The [`ColumnOp`]s that don't fit `col = val` -/// are made into [`IndexColumnOp::Scan`]s immediately which are added to `found`. -fn extract_cols<'a>( - op: &'a ColumnOp, - col_map: &mut BTreeMap<(ColId, OpCmp), SmallVec<[ColValue<'a>; 1]>>, - found: &mut IndexColumnOpSink<'a>, -) { - let mut add_field = |parent, op, col, val| { - let fv = ColValue::new(parent, col, op, val); - col_map.entry((col, op)).or_default().push(fv); - }; - - match op { - ColumnOp::Cmp { cmp, lhs, rhs } => { - if let (ColumnOp::Col(col), ColumnOp::Val(val)) = (&**lhs, &**rhs) { - // `lhs` must be a field that exists and `rhs` must be a value. - add_field(op, *cmp, *col, val); - } - } - ColumnOp::ColCmpVal { lhs, cmp, rhs } => add_field(op, *cmp, *lhs, rhs), - ColumnOp::Log { - op: OpLogic::And, - operands, - } => { - for oper in operands.iter() { - extract_cols(oper, col_map, found); - } - } - ColumnOp::Log { op: OpLogic::Or, .. } | ColumnOp::Col(_) | ColumnOp::Val(_) => { - found.push(IndexColumnOp::Scan(op)); - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -// TODO(bikeshedding): Refactor this struct so that `IndexJoin`s replace the `table`, -// rather than appearing as the first element of the `query`. -// -// `IndexJoin`s do not behave like filters; in fact they behave more like data sources. -// A query conceptually starts with either a single table or an `IndexJoin`, -// and then stacks a set of filters on top of that. -pub struct QueryExpr { - pub source: SourceExpr, - pub query: Vec, -} - -impl From for QueryExpr { - fn from(source: SourceExpr) -> Self { - QueryExpr { source, query: vec![] } - } -} - -impl QueryExpr { - pub fn new>(source: T) -> Self { - Self { - source: source.into(), - query: vec![], - } - } - - /// Iterate over all [`SourceExpr`]s involved in the [`QueryExpr`]. - /// - /// Sources are yielded from left to right. Duplicates are not filtered out. - pub fn walk_sources(&self, on_source: &mut impl FnMut(&SourceExpr) -> Result<(), E>) -> Result<(), E> { - on_source(&self.source)?; - self.query.iter().try_for_each(|q| q.walk_sources(on_source)) - } - - /// Returns the last [`Header`] of this query. - /// - /// Starts the scan from the back to the front, - /// looking for query operations that change the `Header`. - /// These are `JoinInner` and `Project`. - /// If there are no operations that alter the `Header`, - /// this falls back to the origin `self.source.head()`. - pub fn head(&self) -> &Arc
{ - self.query - .iter() - .rev() - .find_map(|op| match op { - Query::Select(_) => None, - Query::IndexScan(scan) => Some(&scan.table.head), - Query::IndexJoin(join) if join.return_index_rows => Some(join.index_side.head()), - Query::IndexJoin(join) => Some(join.probe_side.head()), - Query::Project(proj) => Some(&proj.header_after), - Query::JoinInner(join) => join.inner.as_ref(), - }) - .unwrap_or_else(|| self.source.head()) - } - - /// Does this query read from a given table? - pub fn reads_from_table(&self, id: &TableId) -> bool { - self.source.table_id() == Some(*id) - || self.query.iter().any(|q| match q { - Query::Select(_) | Query::Project(..) => false, - Query::IndexScan(scan) => scan.table.table_id == *id, - Query::JoinInner(join) => join.rhs.reads_from_table(id), - Query::IndexJoin(join) => { - join.index_side.table_id() == Some(*id) || join.probe_side.reads_from_table(id) - } - }) - } - - // Generate an index scan for an equality predicate if this is the first operator. - // Otherwise generate a select. - // TODO: Replace these methods with a proper query optimization pass. - pub fn with_index_eq(mut self, table: DbTable, columns: ColList, value: AlgebraicValue) -> Self { - let point = |v: AlgebraicValue| (Bound::Included(v.clone()), Bound::Included(v)); - - // if this is the first operator in the list, generate index scan - let Some(query) = self.query.pop() else { - let bounds = point(value); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - return self; - }; - match query { - // try to push below join's lhs - Query::JoinInner(JoinExpr { - rhs: - QueryExpr { - source: SourceExpr::DbTable(ref db_table), - .. - }, - .. - }) if table.table_id != db_table.table_id => { - self = self.with_index_eq(db_table.clone(), columns, value); - self.query.push(query); - self - } - // try to push below join's rhs - Query::JoinInner(JoinExpr { - rhs, - col_lhs, - col_rhs, - inner: semi, - }) => { - self.query.push(Query::JoinInner(JoinExpr { - rhs: rhs.with_index_eq(table, columns, value), - col_lhs, - col_rhs, - inner: semi, - })); - self - } - // merge with a preceding select - Query::Select(filter) => { - let op = ColumnOp::and_cmp(OpCmp::Eq, &columns, value); - self.query.push(Query::Select(ColumnOp::and(filter, op))); - self - } - // else generate a new select - query => { - self.query.push(query); - let op = ColumnOp::and_cmp(OpCmp::Eq, &columns, value); - self.query.push(Query::Select(op)); - self - } - } - } - - // Generate an index scan for a range predicate or try merging with a previous index scan. - // Otherwise generate a select. - // TODO: Replace these methods with a proper query optimization pass. - pub fn with_index_lower_bound( - mut self, - table: DbTable, - columns: ColList, - value: AlgebraicValue, - inclusive: bool, - ) -> Self { - // if this is the first operator in the list, generate an index scan - let Some(query) = self.query.pop() else { - let bounds = (Self::bound(value, inclusive), Bound::Unbounded); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - return self; - }; - match query { - // try to push below join's lhs - Query::JoinInner(JoinExpr { - rhs: - QueryExpr { - source: SourceExpr::DbTable(ref db_table), - .. - }, - .. - }) if table.table_id != db_table.table_id => { - self = self.with_index_lower_bound(table, columns, value, inclusive); - self.query.push(query); - self - } - // try to push below join's rhs - Query::JoinInner(JoinExpr { - rhs, - col_lhs, - col_rhs, - inner: semi, - }) => { - self.query.push(Query::JoinInner(JoinExpr { - rhs: rhs.with_index_lower_bound(table, columns, value, inclusive), - col_lhs, - col_rhs, - inner: semi, - })); - self - } - // merge with a preceding upper bounded index scan (inclusive) - Query::IndexScan(IndexScan { - columns: lhs_col_id, - bounds: (Bound::Unbounded, Bound::Included(upper)), - .. - }) if columns == lhs_col_id => { - let bounds = (Self::bound(value, inclusive), Bound::Included(upper)); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - self - } - // merge with a preceding upper bounded index scan (exclusive) - Query::IndexScan(IndexScan { - columns: lhs_col_id, - bounds: (Bound::Unbounded, Bound::Excluded(upper)), - .. - }) if columns == lhs_col_id => { - // Queries like `WHERE x < 5 AND x > 5` never return any rows and are likely mistakes. - // Detect such queries and log a warning. - // Compute this condition early, then compute the resulting query and log it. - // TODO: We should not emit an `IndexScan` in this case. - // Further design work is necessary to decide whether this should be an error at query compile time, - // or whether we should emit a query plan which explicitly says that it will return 0 rows. - // The current behavior is a hack - // because this patch was written (2024-04-01 pgoldman) a short time before the BitCraft alpha, - // and a more invasive change was infeasible. - let is_never = !inclusive && value == upper; - - let bounds = (Self::bound(value, inclusive), Bound::Excluded(upper)); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - - if is_never { - log::warn!("Query will select no rows due to equal excluded bounds: {self:?}") - } - - self - } - // merge with a preceding select - Query::Select(filter) => { - let bounds = (Self::bound(value, inclusive), Bound::Unbounded); - let op = ColumnOp::from_op_col_bounds(&columns, bounds); - self.query.push(Query::Select(ColumnOp::and(filter, op))); - self - } - // else generate a new select - query => { - self.query.push(query); - let bounds = (Self::bound(value, inclusive), Bound::Unbounded); - let op = ColumnOp::from_op_col_bounds(&columns, bounds); - self.query.push(Query::Select(op)); - self - } - } - } - - // Generate an index scan for a range predicate or try merging with a previous index scan. - // Otherwise generate a select. - // TODO: Replace these methods with a proper query optimization pass. - pub fn with_index_upper_bound( - mut self, - table: DbTable, - columns: ColList, - value: AlgebraicValue, - inclusive: bool, - ) -> Self { - // if this is the first operator in the list, generate an index scan - let Some(query) = self.query.pop() else { - self.query.push(Query::IndexScan(IndexScan { - table, - columns, - bounds: (Bound::Unbounded, Self::bound(value, inclusive)), - })); - return self; - }; - match query { - // try to push below join's lhs - Query::JoinInner(JoinExpr { - rhs: - QueryExpr { - source: SourceExpr::DbTable(ref db_table), - .. - }, - .. - }) if table.table_id != db_table.table_id => { - self = self.with_index_upper_bound(table, columns, value, inclusive); - self.query.push(query); - self - } - // try to push below join's rhs - Query::JoinInner(JoinExpr { - rhs, - col_lhs, - col_rhs, - inner: semi, - }) => { - self.query.push(Query::JoinInner(JoinExpr { - rhs: rhs.with_index_upper_bound(table, columns, value, inclusive), - col_lhs, - col_rhs, - inner: semi, - })); - self - } - // merge with a preceding lower bounded index scan (inclusive) - Query::IndexScan(IndexScan { - columns: lhs_col_id, - bounds: (Bound::Included(lower), Bound::Unbounded), - .. - }) if columns == lhs_col_id => { - let bounds = (Bound::Included(lower), Self::bound(value, inclusive)); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - self - } - // merge with a preceding lower bounded index scan (exclusive) - Query::IndexScan(IndexScan { - columns: lhs_col_id, - bounds: (Bound::Excluded(lower), Bound::Unbounded), - .. - }) if columns == lhs_col_id => { - // Queries like `WHERE x < 5 AND x > 5` never return any rows and are likely mistakes. - // Detect such queries and log a warning. - // Compute this condition early, then compute the resulting query and log it. - // TODO: We should not emit an `IndexScan` in this case. - // Further design work is necessary to decide whether this should be an error at query compile time, - // or whether we should emit a query plan which explicitly says that it will return 0 rows. - // The current behavior is a hack - // because this patch was written (2024-04-01 pgoldman) a short time before the BitCraft alpha, - // and a more invasive change was infeasible. - let is_never = !inclusive && value == lower; - - let bounds = (Bound::Excluded(lower), Self::bound(value, inclusive)); - self.query.push(Query::IndexScan(IndexScan { table, columns, bounds })); - - if is_never { - log::warn!("Query will select no rows due to equal excluded bounds: {self:?}") - } - - self - } - // merge with a preceding select - Query::Select(filter) => { - let bounds = (Bound::Unbounded, Self::bound(value, inclusive)); - let op = ColumnOp::from_op_col_bounds(&columns, bounds); - self.query.push(Query::Select(ColumnOp::and(filter, op))); - self - } - // else generate a new select - query => { - self.query.push(query); - let bounds = (Bound::Unbounded, Self::bound(value, inclusive)); - let op = ColumnOp::from_op_col_bounds(&columns, bounds); - self.query.push(Query::Select(op)); - self - } - } - } - - pub fn with_select(mut self, op: O) -> Result - where - O: Into, - { - let op = op.into(); - let Some(query) = self.query.pop() else { - return self.add_base_select(op); - }; - - match (query, op) { - ( - Query::JoinInner(JoinExpr { - rhs, - col_lhs, - col_rhs, - inner, - }), - FieldOp::Cmp { - op: OpQuery::Cmp(cmp), - lhs: field, - rhs: value, - }, - ) => match (*field, *value) { - (FieldOp::Field(FieldExpr::Name(field)), FieldOp::Field(FieldExpr::Value(value))) - // Field is from lhs, so push onto join's left arg - if self.head().column_pos(field).is_some() => - { - // No typing restrictions on `field cmp value`, - // and there are no binary operators to recurse into. - self = self.with_select(FieldOp::cmp(field, cmp, value))?; - self.query.push(Query::JoinInner(JoinExpr { rhs, col_lhs, col_rhs, inner })); - Ok(self) - } - (FieldOp::Field(FieldExpr::Name(field)), FieldOp::Field(FieldExpr::Value(value))) - // Field is from rhs, so push onto join's right arg - if rhs.head().column_pos(field).is_some() => - { - // No typing restrictions on `field cmp value`, - // and there are no binary operators to recurse into. - let rhs = rhs.with_select(FieldOp::cmp(field, cmp, value))?; - self.query.push(Query::JoinInner(JoinExpr { - rhs, - col_lhs, - col_rhs, - inner, - })); - Ok(self) - } - (field, value) => { - self.query.push(Query::JoinInner(JoinExpr { rhs, col_lhs, col_rhs, inner, })); - - // As we have `field op value` we need not demand `bool`, - // but we must still recuse into each side. - self.check_field_op_logics(&field)?; - self.check_field_op_logics(&value)?; - // Convert to `ColumnOp`. - let col = field.names_to_cols(self.head()).unwrap(); - let value = value.names_to_cols(self.head()).unwrap(); - // Add `col op value` filter to query. - self.query.push(Query::Select(ColumnOp::new(OpQuery::Cmp(cmp), col, value))); - Ok(self) - } - }, - // We have a previous filter `lhs`, so join with `rhs` forming `lhs AND rhs`. - (Query::Select(lhs), rhs) => { - // Type check `rhs`, demanding `bool`. - self.check_field_op(&rhs)?; - // Convert to `ColumnOp`. - let rhs = rhs.names_to_cols(self.head()).unwrap(); - // Add `lhs AND op` to query. - self.query.push(Query::Select(ColumnOp::and(lhs, rhs))); - Ok(self) - } - // No previous filter, so add a base one. - (query, op) => { - self.query.push(query); - self.add_base_select(op) - } - } - } - - /// Add a base `Select` query that filters according to `op`. - /// The `op` is checked to produce a `bool` value. - fn add_base_select(mut self, op: FieldOp) -> Result { - // Type check the filter, demanding `bool`. - self.check_field_op(&op)?; - // Convert to `ColumnOp`. - let op = op.names_to_cols(self.head()).unwrap(); - // Add the filter. - self.query.push(Query::Select(op)); - Ok(self) - } - - /// Type checks a `FieldOp` with respect to `self`, - /// ensuring that query evaluation cannot get stuck or panic due to `reduce_bool`. - fn check_field_op(&self, op: &FieldOp) -> Result<(), RelationError> { - use OpQuery::*; - match op { - // `lhs` and `rhs` must both be typed at `bool`. - FieldOp::Cmp { op: Logic(_), lhs, rhs } => { - self.check_field_op(lhs)?; - self.check_field_op(rhs)?; - Ok(()) - } - // `lhs` and `rhs` have no typing restrictions. - // The result of `lhs op rhs` will always be a `bool` - // either by `Eq` or `Ord` on `AlgebraicValue` (see `ColumnOp::compare_bin_op`). - // However, we still have to recurse into `lhs` and `rhs` - // in case we have e.g., `a == (b == c)`. - FieldOp::Cmp { op: Cmp(_), lhs, rhs } => { - self.check_field_op_logics(lhs)?; - self.check_field_op_logics(rhs)?; - Ok(()) - } - FieldOp::Field(FieldExpr::Value(AlgebraicValue::Bool(_))) => Ok(()), - FieldOp::Field(FieldExpr::Value(v)) => Err(RelationError::NotBoolValue { val: v.clone() }), - FieldOp::Field(FieldExpr::Name(field)) => { - let field = *field; - let head = self.head(); - let col_id = head.column_pos_or_err(field)?; - let col_ty = &head.fields[col_id.idx()].algebraic_type; - match col_ty { - &AlgebraicType::Bool => Ok(()), - ty => Err(RelationError::NotBoolType { field, ty: ty.clone() }), - } - } - } - } - - /// Traverses `op`, checking any logical operators for bool-typed operands. - fn check_field_op_logics(&self, op: &FieldOp) -> Result<(), RelationError> { - use OpQuery::*; - match op { - FieldOp::Field(_) => Ok(()), - FieldOp::Cmp { op: Cmp(_), lhs, rhs } => { - self.check_field_op_logics(lhs)?; - self.check_field_op_logics(rhs)?; - Ok(()) - } - FieldOp::Cmp { op: Logic(_), lhs, rhs } => { - self.check_field_op(lhs)?; - self.check_field_op(rhs)?; - Ok(()) - } - } - } - - pub fn with_select_cmp(self, op: O, lhs: LHS, rhs: RHS) -> Result - where - LHS: Into, - RHS: Into, - O: Into, - { - let op = FieldOp::new(op.into(), FieldOp::Field(lhs.into()), FieldOp::Field(rhs.into())); - self.with_select(op) - } - - // Appends a project operation to the query operator pipeline. - // The `wildcard_table_id` represents a projection of the form `table.*`. - // This is used to determine if an inner join can be rewritten as an index join. - pub fn with_project( - mut self, - fields: Vec, - wildcard_table: Option, - ) -> Result { - if !fields.is_empty() { - let header_before = self.head(); - - // Translate the field expressions to column expressions. - let mut cols = Vec::with_capacity(fields.len()); - for field in fields { - cols.push(field.name_to_col(header_before)?); - } - - // Project the header. - // We'll store that so subsequent operations use that as a base. - let header_after = Arc::new(header_before.project(&cols)?); - - // Add the projection. - self.query.push(Query::Project(ProjectExpr { - cols, - wildcard_table, - header_after, - })); - } - Ok(self) - } - - pub fn with_join_inner_raw( - mut self, - q_rhs: QueryExpr, - c_lhs: ColId, - c_rhs: ColId, - inner: Option>, - ) -> Self { - self.query - .push(Query::JoinInner(JoinExpr::new(q_rhs, c_lhs, c_rhs, inner))); - self - } - - pub fn with_join_inner(self, q_rhs: impl Into, c_lhs: ColId, c_rhs: ColId, semi: bool) -> Self { - let q_rhs = q_rhs.into(); - let inner = (!semi).then(|| Arc::new(self.head().extend(q_rhs.head()))); - self.with_join_inner_raw(q_rhs, c_lhs, c_rhs, inner) - } - - fn bound(value: AlgebraicValue, inclusive: bool) -> Bound { - if inclusive { - Bound::Included(value) - } else { - Bound::Excluded(value) - } - } - - /// Try to turn an inner join followed by a projection into a semijoin. - /// - /// This optimization recognizes queries of the form: - /// - /// ```ignore - /// QueryExpr { - /// source: LHS, - /// query: [ - /// JoinInner(JoinExpr { - /// rhs: RHS, - /// semi: false, - /// .. - /// }), - /// Project(LHS.*), - /// ... - /// ] - /// } - /// ``` - /// - /// And combines the `JoinInner` with the `Project` into a `JoinInner` with `semi: true`. - /// - /// Current limitations of this optimization: - /// - The `JoinInner` must be the first (0th) element of the `query`. - /// Future work could search through the `query` to find any applicable `JoinInner`s, - /// but the current implementation inspects only the first expr. - /// This is likely sufficient because this optimization is primarily useful for enabling `try_index_join`, - /// which is fundamentally limited to operate on the first expr. - /// Note that we still get to optimize incremental joins, because we first optimize the original query - /// with [`DbTable`] sources, which results in an [`IndexJoin`] - /// then we replace the sources with [`MemTable`]s and go back to a [`JoinInner`] with `semi: true`. - /// - The `Project` must immediately follow the `JoinInner`, with no intervening exprs. - /// Future work could search through intervening exprs to detect that the RHS table is unused. - /// - The LHS/source table must be a [`DbTable`], not a [`MemTable`]. - /// This is so we can recognize a wildcard project by its table id. - /// Future work could inspect the set of projected fields and compare them to the LHS table's header instead. - pub fn try_semi_join(self) -> QueryExpr { - let QueryExpr { source, query } = self; - - let Some(source_table_id) = source.table_id() else { - // Source is a `MemTable`, so we can't recognize a wildcard projection. Bail. - return QueryExpr { source, query }; - }; - - let mut exprs = query.into_iter(); - let Some(join_candidate) = exprs.next() else { - // No first (0th) expr to be the join; bail. - return QueryExpr { source, query: vec![] }; - }; - let Query::JoinInner(join) = join_candidate else { - // First (0th) expr is not an inner join. Bail. - return QueryExpr { - source, - query: itertools::chain![Some(join_candidate), exprs].collect(), - }; - }; - - let Some(project_candidate) = exprs.next() else { - // No second (1st) expr to be the project. Bail. - return QueryExpr { - source, - query: vec![Query::JoinInner(join)], - }; - }; - - let Query::Project(proj) = project_candidate else { - // Second (1st) expr is not a wildcard projection. Bail. - return QueryExpr { - source, - query: itertools::chain![Some(Query::JoinInner(join)), Some(project_candidate), exprs].collect(), - }; - }; - - if proj.wildcard_table != Some(source_table_id) { - // Projection is selecting the RHS table. Bail. - return QueryExpr { - source, - query: itertools::chain![Some(Query::JoinInner(join)), Some(Query::Project(proj)), exprs].collect(), - }; - }; - - // All conditions met; return a semijoin. - let semijoin = JoinExpr { inner: None, ..join }; - - QueryExpr { - source, - query: itertools::chain![Some(Query::JoinInner(semijoin)), exprs].collect(), - } - } - - // Try to turn an applicable join into an index join. - // An applicable join is one that can use an index to probe the lhs. - // It must also project only the columns from the lhs. - // - // Ex. SELECT Left.* FROM Left JOIN Right ON Left.id = Right.id ... - // where `Left` has an index defined on `id`. - fn try_index_join(self) -> QueryExpr { - let mut query = self; - // We expect a single operation - an inner join with `semi: true`. - // These can be transformed by `try_semi_join` from a sequence of two queries, an inner join followed by a wildcard project. - if query.query.len() != 1 { - return query; - } - - // If the source is a `MemTable`, it doesn't have any indexes, - // so we can't plan an index join. - if query.source.is_mem_table() { - return query; - } - let source = query.source; - let join = query.query.pop().unwrap(); - - match join { - Query::JoinInner(join @ JoinExpr { inner: None, .. }) => { - if !join.rhs.query.is_empty() { - // An applicable join must have an index defined on the correct field. - if source.head().has_constraint(join.col_lhs, Constraints::indexed()) { - let index_join = IndexJoin { - probe_side: join.rhs, - probe_col: join.col_rhs, - index_side: source.clone(), - index_select: None, - index_col: join.col_lhs, - return_index_rows: true, - }; - let query = [Query::IndexJoin(index_join)].into(); - return QueryExpr { source, query }; - } - } - QueryExpr { - source, - query: vec![Query::JoinInner(join)], - } - } - first => QueryExpr { - source, - query: vec![first], - }, - } - } - - /// Look for filters that could use indexes - fn optimize_select(mut q: QueryExpr, op: ColumnOp, tables: &[SourceExpr]) -> QueryExpr { - // Go through each table schema referenced in the query. - // Find the first sargable condition and short-circuit. - let mut fields_found = HashSet::default(); - for schema in tables { - for op in select_best_index(&mut fields_found, schema.head(), &op) { - if let IndexColumnOp::Scan(op) = &op { - // Remove a duplicated/redundant operation on the same `field` and `op` - // like `[Index(a = 1), Index(a = 1), Scan(a = 1)]` - if op.as_col_cmp().is_some_and(|cc| !fields_found.insert(cc)) { - continue; - } - } - - match op { - // A sargable condition for on one of the table schemas, - // either an equality or range condition. - IndexColumnOp::Index(idx) => { - let table = schema - .get_db_table() - .expect("find_sargable_ops(schema, op) implies `schema.is_db_table()`") - .clone(); - - q = match idx { - IndexArgument::Eq { columns, value } => q.with_index_eq(table, columns.clone(), value), - IndexArgument::LowerBound { - columns, - value, - inclusive, - } => q.with_index_lower_bound(table, columns.clone(), value, inclusive), - IndexArgument::UpperBound { - columns, - value, - inclusive, - } => q.with_index_upper_bound(table, columns.clone(), value, inclusive), - }; - } - // Filter condition cannot be answered using an index. - IndexColumnOp::Scan(rhs) => { - let rhs = rhs.clone(); - let op = match q.query.pop() { - // Merge condition into any pre-existing `Select`. - Some(Query::Select(lhs)) => ColumnOp::and(lhs, rhs), - None => rhs, - Some(other) => { - q.query.push(other); - rhs - } - }; - q.query.push(Query::Select(op)); - } - } - } - } - - q - } - - pub fn optimize(mut self, row_count: &impl Fn(TableId, &str) -> i64) -> Self { - let mut q = Self { - source: self.source.clone(), - query: Vec::with_capacity(self.query.len()), - }; - - if matches!(&*self.query, [Query::IndexJoin(_)]) - && let Some(Query::IndexJoin(join)) = self.query.pop() - { - q.query.push(Query::IndexJoin(join.reorder(row_count))); - return q; - } - - for query in self.query { - match query { - Query::Select(op) => { - q = Self::optimize_select(q, op, from_ref(&self.source)); - } - Query::JoinInner(join) => { - q = q.with_join_inner_raw(join.rhs.optimize(row_count), join.col_lhs, join.col_rhs, join.inner); - } - _ => q.query.push(query), - }; - } - - // Make sure to `try_semi_join` before `try_index_join`, as the latter depends on the former. - let q = q.try_semi_join(); - let q = q.try_index_join(); - if matches!(&*q.query, [Query::IndexJoin(_)]) { - return q.optimize(row_count); - } - q - } -} - -impl AuthAccess for Query { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - self.walk_sources(&mut |s| s.check_auth(auth)) - } -} - -#[derive(Debug, Eq, PartialEq, From)] -pub enum Expr { - #[from] - Value(AlgebraicValue), - Block(Vec), - Ident(String), - Crud(Box), - Halt(ErrorLang), -} - -impl From for Expr { - fn from(x: QueryExpr) -> Self { - Expr::Crud(Box::new(CrudExpr::Query(x))) - } -} - -impl fmt::Display for Query { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Query::IndexScan(op) => { - write!(f, "index_scan {op:?}") - } - Query::IndexJoin(op) => { - write!(f, "index_join {op:?}") - } - Query::Select(q) => { - write!(f, "select {q}") - } - Query::Project(proj) => { - let q = &proj.cols; - write!(f, "project")?; - if !q.is_empty() { - write!(f, " ")?; - } - for (pos, x) in q.iter().enumerate() { - write!(f, "{x}")?; - if pos + 1 < q.len() { - write!(f, ", ")?; - } - } - Ok(()) - } - Query::JoinInner(q) => { - write!(f, "&inner {:?} ON {} = {}", q.rhs, q.col_lhs, q.col_rhs) - } - } - } -} - -impl AuthAccess for SourceExpr { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - if auth.has_read_access(self.table_access()) { - return Ok(()); - } - - Err(AuthError::TablePrivate { - named: self.table_name().to_string(), - }) - } -} - -impl AuthAccess for QueryExpr { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - self.walk_sources(&mut |s| s.check_auth(auth)) - } -} - -impl AuthAccess for CrudExpr { - fn check_auth(&self, auth: &AuthCtx) -> Result<(), AuthError> { - // Anyone may query, so as long as the tables involved are public. - if let CrudExpr::Query(q) = self { - return q.check_auth(auth); - } - - // Mutating operations require `owner == caller`. - if !auth.has_write_access() { - return Err(AuthError::InsuffientPrivileges); - } - - Ok(()) - } -} - -#[derive(Debug, PartialEq)] -pub struct Update { - pub table_id: TableId, - pub table_name: TableName, - pub inserts: Vec, - pub deletes: Vec, -} - -#[derive(Debug, PartialEq)] -pub enum Code { - Value(AlgebraicValue), - Table(MemTable), - Halt(ErrorLang), - Block(Vec), - Crud(CrudExpr), - Pass(Option), -} - -impl fmt::Display for Code { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Code::Value(x) => { - write!(f, "{:?}", &x) - } - Code::Block(_) => write!(f, "Block"), - x => todo!("{:?}", x), - } - } -} - -#[derive(Debug, PartialEq)] -pub enum CodeResult { - Value(AlgebraicValue), - Table(MemTable), - Block(Vec), - Halt(ErrorLang), - Pass(Option), -} - -impl From for CodeResult { - fn from(code: Code) -> Self { - match code { - Code::Value(x) => Self::Value(x), - Code::Table(x) => Self::Table(x), - Code::Halt(x) => Self::Halt(x), - Code::Block(x) => { - if x.is_empty() { - Self::Pass(None) - } else { - Self::Block(x.into_iter().map(CodeResult::from).collect()) - } - } - Code::Pass(x) => Self::Pass(x), - x => Self::Halt(ErrorLang::new( - ErrorKind::Compiler, - Some(&format!("Invalid result: {x}")), - )), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use spacetimedb_lib::{db::raw_def::v9::RawModuleDefV9Builder, Identity}; - use spacetimedb_sats::{product, AlgebraicType, ProductType}; - use spacetimedb_schema::{def::ModuleDef, relation::Column, schema::Schema}; - use typed_arena::Arena; - - const ALICE: Identity = Identity::from_byte_array([1; 32]); - const BOB: Identity = Identity::from_byte_array([2; 32]); - - // TODO(kim): Should better do property testing here, but writing generators - // on recursive types (ie. `Query` and friends) is tricky. - - fn tables() -> [SourceExpr; 2] { - [ - SourceExpr::InMemory { - source_id: SourceId(0), - header: Arc::new(Header { - table_id: 42.into(), - table_name: TableName::for_test("foo"), - fields: vec![], - constraints: Default::default(), - }), - table_type: StTableType::User, - table_access: StAccess::Private, - }, - SourceExpr::DbTable(DbTable { - head: Arc::new(Header { - table_id: 42.into(), - table_name: TableName::for_test("foo"), - fields: vec![], - constraints: [(ColId(42).into(), Constraints::indexed())].into_iter().collect(), - }), - table_id: 42.into(), - table_type: StTableType::User, - table_access: StAccess::Private, - }), - ] - } - - fn queries() -> impl IntoIterator { - let [mem_table, db_table] = tables(); - // Skip `Query::Select` and `QueryProject` -- they don't have table - // information - [ - Query::IndexScan(IndexScan { - table: db_table.get_db_table().unwrap().clone(), - columns: ColList::new(42.into()), - bounds: (Bound::Included(22.into()), Bound::Unbounded), - }), - Query::IndexJoin(IndexJoin { - probe_side: mem_table.clone().into(), - probe_col: 0.into(), - index_side: SourceExpr::DbTable(DbTable { - head: Arc::new(Header { - table_id: db_table.head().table_id, - table_name: db_table.table_name().clone(), - fields: vec![], - constraints: Default::default(), - }), - table_id: db_table.head().table_id, - table_type: StTableType::User, - table_access: StAccess::Public, - }), - index_select: None, - index_col: 22.into(), - return_index_rows: true, - }), - Query::JoinInner(JoinExpr { - col_rhs: 1.into(), - rhs: mem_table.into(), - col_lhs: 1.into(), - inner: None, - }), - ] - } - - fn query_exprs() -> impl IntoIterator { - tables().map(|table| { - let mut expr = QueryExpr::from(table); - expr.query = queries().into_iter().collect(); - expr - }) - } - - fn assert_owner_private(auth: &T) { - assert!(auth.check_auth(&AuthCtx::new(ALICE, ALICE)).is_ok()); - assert!(matches!( - auth.check_auth(&AuthCtx::new(ALICE, BOB)), - Err(AuthError::TablePrivate { .. }) - )); - } - - fn assert_owner_required(auth: T) { - assert!(auth.check_auth(&AuthCtx::new(ALICE, ALICE)).is_ok()); - assert!(matches!( - auth.check_auth(&AuthCtx::new(ALICE, BOB)), - Err(AuthError::InsuffientPrivileges) - )); - } - - fn mem_table(id: TableId, name: &str, fields: &[(u16, AlgebraicType, bool)]) -> SourceExpr { - let table_access = StAccess::Public; - let head = Header::new( - id, - TableName::for_test(name), - fields - .iter() - .map(|(col, ty, _)| Column::new(FieldName::new(id, (*col).into()), ty.clone())) - .collect(), - fields - .iter() - .enumerate() - .filter(|(_, (_, _, indexed))| *indexed) - .map(|(i, _)| (ColId::from(i).into(), Constraints::indexed())), - ); - SourceExpr::InMemory { - source_id: SourceId(0), - header: Arc::new(head), - table_access, - table_type: StTableType::User, - } - } - - #[test] - fn test_index_to_inner_join() { - let index_side = mem_table( - 0.into(), - "index", - &[(0, AlgebraicType::U8, false), (1, AlgebraicType::U8, true)], - ); - let probe_side = mem_table( - 1.into(), - "probe", - &[(0, AlgebraicType::U8, false), (1, AlgebraicType::U8, true)], - ); - - let index_col = 1.into(); - let probe_col = 1.into(); - let index_select = ColumnOp::cmp(0, OpCmp::Eq, 0u8); - let join = IndexJoin { - probe_side: probe_side.clone().into(), - probe_col, - index_side: index_side.clone(), - index_select: Some(index_select.clone()), - index_col, - return_index_rows: false, - }; - - let expr = join.to_inner_join(); - - assert_eq!(expr.source, probe_side); - assert_eq!(expr.query.len(), 1); - - let Query::JoinInner(ref join) = expr.query[0] else { - panic!("expected an inner join, but got {:#?}", expr.query[0]); - }; - - assert_eq!(join.col_lhs, probe_col); - assert_eq!(join.col_rhs, index_col); - assert_eq!( - join.rhs, - QueryExpr { - source: index_side, - query: vec![index_select.into()] - } - ); - assert_eq!(join.inner, None); - } - - fn setup_best_index() -> (Header, [ColId; 5], [AlgebraicValue; 5]) { - let table_id = 0.into(); - - let vals = [1, 2, 3, 4, 5].map(AlgebraicValue::U64); - let col_ids = [0, 1, 2, 3, 4].map(ColId); - let [a, b, c, d, _] = col_ids; - let columns = col_ids.map(|c| Column::new(FieldName::new(table_id, c), AlgebraicType::I8)); - - let head1 = Header::new( - table_id, - TableName::for_test("t1"), - columns.to_vec(), - vec![ - // Index a - (a.into(), Constraints::primary_key()), - // Index b - (b.into(), Constraints::indexed()), - // Index b + c - (col_list![b, c], Constraints::unique()), - // Index a + b + c + d - (col_list![a, b, c, d], Constraints::indexed()), - ], - ); - - (head1, col_ids, vals) - } - - fn make_field_value((cmp, col, value): (OpCmp, ColId, &AlgebraicValue)) -> ColumnOp { - ColumnOp::cmp(col, cmp, value.clone()) - } - - fn scan_eq<'a>(arena: &'a Arena, col: ColId, val: &'a AlgebraicValue) -> IndexColumnOp<'a> { - scan(arena, OpCmp::Eq, col, val) - } - - fn scan<'a>(arena: &'a Arena, cmp: OpCmp, col: ColId, val: &'a AlgebraicValue) -> IndexColumnOp<'a> { - IndexColumnOp::Scan(arena.alloc(make_field_value((cmp, col, val)))) - } - - #[test] - fn best_index() { - let (head1, fields, vals) = setup_best_index(); - let [col_a, col_b, col_c, col_d, col_e] = fields; - let [val_a, val_b, val_c, val_d, val_e] = vals; - - let arena = Arena::new(); - let select_best_index = |fields: &[_]| { - let fields = fields - .iter() - .copied() - .map(|(col, val): (ColId, _)| make_field_value((OpCmp::Eq, col, val))) - .reduce(ColumnOp::and) - .unwrap(); - select_best_index(&mut <_>::default(), &head1, arena.alloc(fields)) - }; - - let col_list_arena = Arena::new(); - let idx_eq = |cols, val| make_index_arg(OpCmp::Eq, col_list_arena.alloc(cols), val); - - // Check for simple scan - assert_eq!( - select_best_index(&[(col_d, &val_e)]), - [scan_eq(&arena, col_d, &val_e)].into(), - ); - - assert_eq!( - select_best_index(&[(col_a, &val_a)]), - [idx_eq(col_a.into(), val_a.clone())].into(), - ); - - assert_eq!( - select_best_index(&[(col_b, &val_b)]), - [idx_eq(col_b.into(), val_b.clone())].into(), - ); - - // Check for permutation - assert_eq!( - select_best_index(&[(col_b, &val_b), (col_c, &val_c)]), - [idx_eq( - col_list![col_b, col_c], - product![val_b.clone(), val_c.clone()].into() - )] - .into(), - ); - - assert_eq!( - select_best_index(&[(col_c, &val_c), (col_b, &val_b)]), - [idx_eq( - col_list![col_b, col_c], - product![val_b.clone(), val_c.clone()].into() - )] - .into(), - ); - - // Check for permutation - assert_eq!( - select_best_index(&[(col_a, &val_a), (col_b, &val_b), (col_c, &val_c), (col_d, &val_d)]), - [idx_eq( - col_list![col_a, col_b, col_c, col_d], - product![val_a.clone(), val_b.clone(), val_c.clone(), val_d.clone()].into(), - )] - .into(), - ); - - assert_eq!( - select_best_index(&[(col_b, &val_b), (col_a, &val_a), (col_d, &val_d), (col_c, &val_c)]), - [idx_eq( - col_list![col_a, col_b, col_c, col_d], - product![val_a.clone(), val_b.clone(), val_c.clone(), val_d.clone()].into(), - )] - .into() - ); - - // Check mix scan + index - assert_eq!( - select_best_index(&[(col_b, &val_b), (col_a, &val_a), (col_e, &val_e), (col_d, &val_d)]), - [ - idx_eq(col_a.into(), val_a.clone()), - idx_eq(col_b.into(), val_b.clone()), - scan_eq(&arena, col_d, &val_d), - scan_eq(&arena, col_e, &val_e), - ] - .into() - ); - - assert_eq!( - select_best_index(&[(col_b, &val_b), (col_c, &val_c), (col_d, &val_d)]), - [ - idx_eq(col_list![col_b, col_c], product![val_b.clone(), val_c.clone()].into(),), - scan_eq(&arena, col_d, &val_d), - ] - .into() - ); - } - - #[test] - fn best_index_range() { - let arena = Arena::new(); - - let (head1, cols, vals) = setup_best_index(); - let [col_a, col_b, col_c, col_d, _] = cols; - let [val_a, val_b, val_c, val_d, _] = vals; - - let select_best_index = |cols: &[_]| { - let fields = cols.iter().map(|x| make_field_value(*x)).reduce(ColumnOp::and).unwrap(); - select_best_index(&mut <_>::default(), &head1, arena.alloc(fields)) - }; - - let col_list_arena = Arena::new(); - let idx = |cmp, cols: &[ColId], val: &AlgebraicValue| { - let columns = cols.iter().copied().collect::(); - let columns = col_list_arena.alloc(columns); - make_index_arg(cmp, columns, val.clone()) - }; - - // `a > va AND a < vb` => `[index(a), index(a)]` - assert_eq!( - select_best_index(&[(OpCmp::Gt, col_a, &val_a), (OpCmp::Lt, col_a, &val_b)]), - [idx(OpCmp::Lt, &[col_a], &val_b), idx(OpCmp::Gt, &[col_a], &val_a)].into() - ); - - // `d > vd AND d < vb` => `[scan(d), scan(d)]` - assert_eq!( - select_best_index(&[(OpCmp::Gt, col_d, &val_d), (OpCmp::Lt, col_d, &val_b)]), - [ - scan(&arena, OpCmp::Lt, col_d, &val_b), - scan(&arena, OpCmp::Gt, col_d, &val_d) - ] - .into() - ); - - // `b > vb AND c < vc` => `[index(b), scan(c)]`. - assert_eq!( - select_best_index(&[(OpCmp::Gt, col_b, &val_b), (OpCmp::Lt, col_c, &val_c)]), - [idx(OpCmp::Gt, &[col_b], &val_b), scan(&arena, OpCmp::Lt, col_c, &val_c)].into() - ); - - // `b = vb AND a >= va AND c = vc` => `[index(b, c), index(a)]` - let idx_bc = idx( - OpCmp::Eq, - &[col_b, col_c], - &product![val_b.clone(), val_c.clone()].into(), - ); - assert_eq!( - // - select_best_index(&[ - (OpCmp::Eq, col_b, &val_b), - (OpCmp::GtEq, col_a, &val_a), - (OpCmp::Eq, col_c, &val_c), - ]), - [idx_bc.clone(), idx(OpCmp::GtEq, &[col_a], &val_a),].into() - ); - - // `b > vb AND a = va AND c = vc` => `[index(a), index(b), scan(c)]` - assert_eq!( - select_best_index(&[ - (OpCmp::Gt, col_b, &val_b), - (OpCmp::Eq, col_a, &val_a), - (OpCmp::Lt, col_c, &val_c), - ]), - [ - idx(OpCmp::Eq, &[col_a], &val_a), - idx(OpCmp::Gt, &[col_b], &val_b), - scan(&arena, OpCmp::Lt, col_c, &val_c), - ] - .into() - ); - - // `a = va AND b = vb AND c = vc AND d > vd` => `[index(b, c), index(a), scan(d)]` - assert_eq!( - select_best_index(&[ - (OpCmp::Eq, col_a, &val_a), - (OpCmp::Eq, col_b, &val_b), - (OpCmp::Eq, col_c, &val_c), - (OpCmp::Gt, col_d, &val_d), - ]), - [ - idx_bc.clone(), - idx(OpCmp::Eq, &[col_a], &val_a), - scan(&arena, OpCmp::Gt, col_d, &val_d), - ] - .into() - ); - - // `b = vb AND c = vc AND b = vb AND c = vc` => `[index(b, c), index(b, c)]` - assert_eq!( - select_best_index(&[ - (OpCmp::Eq, col_b, &val_b), - (OpCmp::Eq, col_c, &val_c), - (OpCmp::Eq, col_b, &val_b), - (OpCmp::Eq, col_c, &val_c), - ]), - [idx_bc.clone(), idx_bc].into() - ); - } - - #[test] - fn test_auth_table() { - tables().iter().for_each(assert_owner_private) - } - - #[test] - fn test_auth_query_code() { - for code in query_exprs() { - assert_owner_private(&code) - } - } - - #[test] - fn test_auth_query() { - for query in queries() { - assert_owner_private(&query); - } - } - - #[test] - fn test_auth_crud_code_query() { - for query in query_exprs() { - let crud = CrudExpr::Query(query); - assert_owner_private(&crud); - } - } - - #[test] - fn test_auth_crud_code_insert() { - for table in tables().into_iter().filter_map(|s| s.get_db_table().cloned()) { - let crud = CrudExpr::Insert { table, rows: vec![] }; - assert_owner_required(crud); - } - } - - #[test] - fn test_auth_crud_code_update() { - for qc in query_exprs() { - let crud = CrudExpr::Update { - delete: qc, - assignments: Default::default(), - }; - assert_owner_required(crud); - } - } - - #[test] - fn test_auth_crud_code_delete() { - for query in query_exprs() { - let crud = CrudExpr::Delete { query }; - assert_owner_required(crud); - } - } - - fn test_def() -> ModuleDef { - let mut builder = RawModuleDefV9Builder::new(); - builder.build_table_with_new_type( - "lhs", - ProductType::from([("a", AlgebraicType::I32), ("b", AlgebraicType::String)]), - true, - ); - builder.build_table_with_new_type( - "rhs", - ProductType::from([("c", AlgebraicType::I32), ("d", AlgebraicType::I64)]), - true, - ); - builder.finish().try_into().expect("test def should be valid") - } - - #[test] - /// Tests that [`QueryExpr::optimize`] can rewrite inner joins followed by projections into semijoins. - fn optimize_inner_join_to_semijoin() { - let def: ModuleDef = test_def(); - let lhs = TableSchema::from_module_def(&def, def.table("lhs").unwrap(), (), 0.into()); - let rhs = TableSchema::from_module_def(&def, def.table("rhs").unwrap(), (), 1.into()); - - let lhs_source = SourceExpr::from(&lhs); - let rhs_source = SourceExpr::from(&rhs); - - let q = QueryExpr::new(lhs_source.clone()) - .with_join_inner(rhs_source.clone(), 0.into(), 0.into(), false) - .with_project( - [0, 1] - .map(|c| FieldExpr::Name(FieldName::new(lhs.table_id, c.into()))) - .into(), - Some(TableId::SENTINEL), - ) - .unwrap(); - let q = q.optimize(&|_, _| 0); - - assert_eq!(q.source, lhs_source, "Optimized query should read from lhs"); - - assert_eq!( - q.query.len(), - 1, - "Optimized query should have a single member, a semijoin" - ); - match &q.query[0] { - Query::JoinInner(JoinExpr { rhs, inner: semi, .. }) => { - assert_eq!(semi, &None, "Optimized query should be a semijoin"); - assert_eq!(rhs.source, rhs_source, "Optimized query should filter with rhs"); - assert!( - rhs.query.is_empty(), - "Optimized query should not filter rhs before joining" - ); - } - wrong => panic!("Expected an inner join, but found {wrong:?}"), - } - } - - #[test] - /// Tests that [`QueryExpr::optimize`] will not rewrite inner joins which are not followed by projections to the LHS table. - fn optimize_inner_join_no_project() { - let def: ModuleDef = test_def(); - let lhs = TableSchema::from_module_def(&def, def.table("lhs").unwrap(), (), 0.into()); - let rhs = TableSchema::from_module_def(&def, def.table("rhs").unwrap(), (), 1.into()); - - let lhs_source = SourceExpr::from(&lhs); - let rhs_source = SourceExpr::from(&rhs); - - let q = QueryExpr::new(lhs_source.clone()).with_join_inner(rhs_source.clone(), 0.into(), 0.into(), false); - let optimized = q.clone().optimize(&|_, _| 0); - assert_eq!(q, optimized); - } - - #[test] - /// Tests that [`QueryExpr::optimize`] will not rewrite inner joins followed by projections to the RHS rather than LHS table. - fn optimize_inner_join_wrong_project() { - let def: ModuleDef = test_def(); - let lhs = TableSchema::from_module_def(&def, def.table("lhs").unwrap(), (), 0.into()); - let rhs = TableSchema::from_module_def(&def, def.table("rhs").unwrap(), (), 1.into()); - - let lhs_source = SourceExpr::from(&lhs); - let rhs_source = SourceExpr::from(&rhs); - - let q = QueryExpr::new(lhs_source.clone()) - .with_join_inner(rhs_source.clone(), 0.into(), 0.into(), false) - .with_project( - [0, 1] - .map(|c| FieldExpr::Name(FieldName::new(rhs.table_id, c.into()))) - .into(), - Some(TableId(1)), - ) - .unwrap(); - let optimized = q.clone().optimize(&|_, _| 0); - assert_eq!(q, optimized); - } -} diff --git a/crates/vm/src/iterators.rs b/crates/vm/src/iterators.rs deleted file mode 100644 index 9b4ae036fe7..00000000000 --- a/crates/vm/src/iterators.rs +++ /dev/null @@ -1,21 +0,0 @@ -use crate::rel_ops::RelOps; -use crate::relation::RelValue; - -/// Turns an iterator over [`RelValue<'_>`]s into a `RelOps`. -#[derive(Debug)] -pub struct RelIter { - pub iter: I, -} - -impl RelIter { - pub fn new(iter: impl IntoIterator) -> Self { - let iter = iter.into_iter(); - Self { iter } - } -} - -impl<'a, I: Iterator>> RelOps<'a> for RelIter { - fn next(&mut self) -> Option> { - self.iter.next() - } -} diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs deleted file mode 100644 index a443a0ee0fd..00000000000 --- a/crates/vm/src/lib.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! Abstract Virtual Machine for execution of end-user logic -//! -//! It optimizes the code & include a more general "query planner" -//! -//! The execution is split in 3 "phases": -//! -//! 1- AST formation -//! -//! Generate the AST (that could be invalid according to the semantics). -//! -//! This step is outside the [vm] and can be done, for example, by the SQL layer. -//! -//! Use [dsl] to build the [expr:Expr] that build the AST. -//! -//! 2- AST validation -//! -//! Calling [eval::optimize] verify the code has the correct semantics (ie: It checks types, schemas, functions are valid, etc.), -//! and "desugar" the code in a more optimal form for later execution. -//! -//! This build [expr::Expr] that is what could be stored in the database, ie: Is like bytecode. -//! -//! 3- Execution -//! -//! Run the AST build from [expr::Expr]. It assumes is correct. -//! - -pub use spacetimedb_lib::operator; - -pub mod errors; -pub mod eval; -pub mod expr; -pub mod iterators; -pub mod ops; -pub mod program; -pub mod rel_ops; -pub mod relation; diff --git a/crates/vm/src/main.rs b/crates/vm/src/main.rs deleted file mode 100644 index 0317bccf167..00000000000 --- a/crates/vm/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - todo!("Waiting for reimplement vm") -} diff --git a/crates/vm/src/ops/mod.rs b/crates/vm/src/ops/mod.rs deleted file mode 100644 index 5313ea1f5f9..00000000000 --- a/crates/vm/src/ops/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -//! Implements the in-built operators & functions loaded by the `vm` -pub mod parse; diff --git a/crates/vm/src/ops/parse.rs b/crates/vm/src/ops/parse.rs deleted file mode 100644 index b45c5dd339b..00000000000 --- a/crates/vm/src/ops/parse.rs +++ /dev/null @@ -1,92 +0,0 @@ -use crate::errors::{ErrorType, ErrorVm}; -use spacetimedb_lib::{ConnectionId, Identity}; -use spacetimedb_sats::satn::Satn; -use spacetimedb_sats::{i256, u256, AlgebraicType, AlgebraicValue, ProductType, SumType}; -use std::fmt::Display; -use std::str::FromStr; - -fn _parse(value: &str, ty: &AlgebraicType) -> Result -where - F: FromStr + Into, - ::Err: Display, -{ - match value.parse::() { - Ok(x) => Ok(x.into()), - Err(err) => Err(ErrorType::Parse { - value: value.to_string(), - ty: ty.to_satn(), - err: err.to_string(), - } - .into()), - } -} - -/// Try to parse `tag_name` for a simple enum on `sum` into a valid `tag` value of `AlgebraicValue` -pub fn parse_simple_enum(sum: &SumType, tag_name: &str) -> Result { - if let Some((pos, _tag)) = sum.get_variant_simple(tag_name) { - Ok(AlgebraicValue::enum_simple(pos)) - } else { - Err(ErrorVm::Unsupported(format!( - "Not found enum tag '{tag_name}' or not a simple enum: {}", - sum.to_satn_pretty() - ))) - } -} - -/// Try to parse `value` as [`Identity`] or [`ConnectionId`]. -pub fn parse_product(product: &ProductType, value: &str) -> Result { - if product.is_identity() { - return Ok(Identity::from_hex(value.trim_start_matches("0x")) - .map_err(|err| ErrorVm::Other(err.into()))? - .into()); - } - if product.is_connection_id() { - return Ok(ConnectionId::from_hex(value.trim_start_matches("0x")) - .map_err(ErrorVm::Other)? - .into()); - } - Err(ErrorVm::Unsupported(format!( - "Can't parse '{value}' to {}", - product.to_satn_pretty() - ))) -} - -/// Parse a `&str` into [AlgebraicValue] using the supplied [AlgebraicType]. -/// -/// ``` -/// use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; -/// use spacetimedb_vm::errors::ErrorLang; -/// use spacetimedb_vm::ops::parse::parse; -/// -/// assert_eq!(parse("1", &AlgebraicType::I32).map_err(ErrorLang::from), Ok(AlgebraicValue::I32(1))); -/// assert_eq!(parse("true", &AlgebraicType::Bool).map_err(ErrorLang::from), Ok(AlgebraicValue::Bool(true))); -/// assert_eq!(parse("1.0", &AlgebraicType::F64).map_err(ErrorLang::from), Ok(AlgebraicValue::F64(1.0f64.into()))); -/// assert_eq!(parse("Player", &AlgebraicType::simple_enum(["Player"].into_iter())).map_err(ErrorLang::from), Ok(AlgebraicValue::enum_simple(0))); -/// assert!(parse("bananas", &AlgebraicType::I32).is_err()); -/// ``` -pub fn parse(value: &str, ty: &AlgebraicType) -> Result { - match ty { - &AlgebraicType::Bool => _parse::(value, ty), - &AlgebraicType::I8 => _parse::(value, ty), - &AlgebraicType::U8 => _parse::(value, ty), - &AlgebraicType::I16 => _parse::(value, ty), - &AlgebraicType::U16 => _parse::(value, ty), - &AlgebraicType::I32 => _parse::(value, ty), - &AlgebraicType::U32 => _parse::(value, ty), - &AlgebraicType::I64 => _parse::(value, ty), - &AlgebraicType::U64 => _parse::(value, ty), - &AlgebraicType::I128 => _parse::(value, ty), - &AlgebraicType::U128 => _parse::(value, ty), - &AlgebraicType::I256 => _parse::(value, ty), - &AlgebraicType::U256 => _parse::(value, ty), - &AlgebraicType::F32 => _parse::(value, ty), - &AlgebraicType::F64 => _parse::(value, ty), - &AlgebraicType::String => Ok(AlgebraicValue::String(value.into())), - AlgebraicType::Sum(sum) => parse_simple_enum(sum, value), - AlgebraicType::Product(product) => parse_product(product, value), - x => Err(ErrorVm::Unsupported(format!( - "Can't parse '{value}' to {}", - x.to_satn_pretty() - ))), - } -} diff --git a/crates/vm/src/program.rs b/crates/vm/src/program.rs deleted file mode 100644 index d5166eb8edf..00000000000 --- a/crates/vm/src/program.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Definition for a `Program` to run code. -//! -//! It carries an [EnvDb] with the functions, idents, types. - -use crate::errors::ErrorVm; -use crate::expr::{Code, CrudExpr, SourceSet}; -use spacetimedb_sats::ProductValue; - -/// A trait to allow split the execution of `programs` to allow executing -/// `queries` that take in account each `program` state/enviroment. -/// -/// To be specific, it allows you to run queries that run on the `SpacetimeDB` engine. -/// -/// It could also permit run queries backed by different engines, like in `MySql`. -pub trait ProgramVm { - /// Allows to execute the query with the state carried by the implementation of this - /// trait - fn eval_query(&mut self, query: CrudExpr, sources: Sources<'_, N>) -> Result; -} - -pub type Sources<'a, const N: usize> = &'a mut SourceSet, N>; diff --git a/crates/vm/src/rel_ops.rs b/crates/vm/src/rel_ops.rs deleted file mode 100644 index 088049f33bd..00000000000 --- a/crates/vm/src/rel_ops.rs +++ /dev/null @@ -1,237 +0,0 @@ -use core::iter; - -use crate::relation::RelValue; -use spacetimedb_data_structures::map::HashMap; -use spacetimedb_sats::AlgebraicValue; -use spacetimedb_schema::relation::ColExpr; - -/// A trait for dealing with fallible iterators for the database. -pub trait RelOps<'a> { - /// Advances the `iterator` and returns the next [RelValue]. - fn next(&mut self) -> Option>; - - /// Creates an `Iterator` which uses a closure to determine if a [RelValueRef] should be yielded. - /// - /// Given a [RelValueRef] the closure must return true or false. - /// The returned iterator will yield only the elements for which the closure returns true. - /// - /// Note: - /// - /// It is the equivalent of a `WHERE` clause on SQL. - #[inline] - fn select

(self, predicate: P) -> Select - where - P: FnMut(&RelValue<'_>) -> bool, - Self: Sized, - { - Select::new(self, predicate) - } - - /// Creates an `Iterator` which uses a closure that projects to a new [RelValue] extracted from the current. - /// - /// Given a [RelValue] the closure must return a subset of the current one. - /// - /// The [Header] is pre-checked that all the fields exist and return a error if any field is not found. - /// - /// Note: - /// - /// It is the equivalent of a `SELECT` clause on SQL. - #[inline] - fn project<'b, P>(self, cols: &'b [ColExpr], extractor: P) -> Project<'b, Self, P> - where - P: for<'c> FnMut(&[ColExpr], RelValue<'c>) -> RelValue<'c>, - Self: Sized, - { - Project::new(self, cols, extractor) - } - - /// Intersection between the left and the right, both (non-sorted) `iterators`. - /// - /// The hash join strategy requires the right iterator can be collected to a `HashMap`. - /// The left iterator can be arbitrarily long. - /// - /// It is therefore asymmetric (you can't flip the iterators to get a right_outer join). - /// - /// Note: - /// - /// It is the equivalent of a `INNER JOIN` clause on SQL. - #[inline] - fn join_inner( - self, - with: Rhs, - key_lhs: KeyLhs, - key_rhs: KeyRhs, - predicate: Pred, - project: Proj, - ) -> JoinInner<'a, Self, Rhs, KeyLhs, KeyRhs, Pred, Proj> - where - Self: Sized, - Pred: FnMut(&RelValue<'a>, &RelValue<'a>) -> bool, - Proj: FnMut(RelValue<'a>, RelValue<'a>) -> RelValue<'a>, - KeyLhs: FnMut(&RelValue<'a>) -> AlgebraicValue, - KeyRhs: FnMut(&RelValue<'a>) -> AlgebraicValue, - Rhs: RelOps<'a>, - { - JoinInner::new(self, with, key_lhs, key_rhs, predicate, project) - } - - /// Collect all the rows in this relation into a `Vec` given a function `RelValue<'a> -> T`. - #[inline] - fn collect_vec(mut self, mut convert: impl FnMut(RelValue<'a>) -> T) -> Vec - where - Self: Sized, - { - let mut result = Vec::new(); - while let Some(row) = self.next() { - result.push(convert(row)); - } - result - } - - fn iter(&mut self) -> impl Iterator> - where - Self: Sized, - { - iter::from_fn(move || self.next()) - } -} - -impl<'a, I: RelOps<'a> + ?Sized> RelOps<'a> for Box { - fn next(&mut self) -> Option> { - (**self).next() - } -} - -/// `RelOps` iterator which never returns any rows. -/// -/// Used to compile queries with unsatisfiable bounds, like `WHERE x < 5 AND x > 5`. -#[derive(Clone, Debug)] -pub struct EmptyRelOps; - -impl<'a> RelOps<'a> for EmptyRelOps { - fn next(&mut self) -> Option> { - None - } -} - -#[derive(Clone, Debug)] -pub struct Select { - pub(crate) iter: I, - pub(crate) predicate: P, -} - -impl Select { - pub fn new(iter: I, predicate: P) -> Select { - Select { iter, predicate } - } -} - -impl<'a, I, P> RelOps<'a> for Select -where - I: RelOps<'a>, - P: FnMut(&RelValue<'a>) -> bool, -{ - fn next(&mut self) -> Option> { - let filter = &mut self.predicate; - while let Some(v) = self.iter.next() { - if filter(&v) { - return Some(v); - } - } - None - } -} - -#[derive(Clone, Debug)] -pub struct Project<'a, I, P> { - pub(crate) cols: &'a [ColExpr], - pub(crate) iter: I, - pub(crate) extractor: P, -} - -impl<'a, I, P> Project<'a, I, P> { - pub fn new(iter: I, cols: &'a [ColExpr], extractor: P) -> Project<'a, I, P> { - Project { iter, cols, extractor } - } -} - -impl<'a, I, P> RelOps<'a> for Project<'_, I, P> -where - I: RelOps<'a>, - P: FnMut(&[ColExpr], RelValue<'a>) -> RelValue<'a>, -{ - fn next(&mut self) -> Option> { - self.iter.next().map(|v| (self.extractor)(self.cols, v)) - } -} - -#[derive(Clone, Debug)] -pub struct JoinInner<'a, Lhs, Rhs, KeyLhs, KeyRhs, Pred, Proj> { - pub(crate) lhs: Lhs, - pub(crate) rhs: Rhs, - pub(crate) key_lhs: KeyLhs, - pub(crate) key_rhs: KeyRhs, - pub(crate) predicate: Pred, - pub(crate) projection: Proj, - map: HashMap>>, - filled_rhs: bool, - left: Option>, -} - -impl JoinInner<'_, Lhs, Rhs, KeyLhs, KeyRhs, Pred, Proj> { - pub fn new(lhs: Lhs, rhs: Rhs, key_lhs: KeyLhs, key_rhs: KeyRhs, predicate: Pred, projection: Proj) -> Self { - Self { - map: HashMap::default(), - lhs, - rhs, - key_lhs, - key_rhs, - predicate, - projection, - filled_rhs: false, - left: None, - } - } -} - -impl<'a, Lhs, Rhs, KeyLhs, KeyRhs, Pred, Proj> RelOps<'a> for JoinInner<'a, Lhs, Rhs, KeyLhs, KeyRhs, Pred, Proj> -where - Lhs: RelOps<'a>, - Rhs: RelOps<'a>, - KeyLhs: FnMut(&RelValue<'a>) -> AlgebraicValue, - KeyRhs: FnMut(&RelValue<'a>) -> AlgebraicValue, - Pred: FnMut(&RelValue<'a>, &RelValue<'a>) -> bool, - Proj: FnMut(RelValue<'a>, RelValue<'a>) -> RelValue<'a>, -{ - fn next(&mut self) -> Option> { - // Consume `Rhs`, building a map `KeyRhs => Rhs`. - if !self.filled_rhs { - self.map = HashMap::default(); - while let Some(row_rhs) = self.rhs.next() { - let key_rhs = (self.key_rhs)(&row_rhs); - self.map.entry(key_rhs).or_default().push(row_rhs); - } - self.filled_rhs = true; - } - - loop { - // Consume a row in `Lhs` and project to `KeyLhs`. - let lhs = match &self.left { - Some(left) => left, - None => self.left.insert(self.lhs.next()?), - }; - let k = (self.key_lhs)(lhs); - - // If we can relate `KeyLhs` and `KeyRhs`, we have candidate. - // If that candidate still has rhs elements, test against the predicate and yield. - if let Some(rvv) = self.map.get_mut(&k) - && let Some(rhs) = rvv.pop() - && (self.predicate)(lhs, &rhs) - { - return Some((self.projection)(lhs.clone(), rhs)); - } - self.left = None; - continue; - } - } -} diff --git a/crates/vm/src/relation.rs b/crates/vm/src/relation.rs deleted file mode 100644 index a56eb4286be..00000000000 --- a/crates/vm/src/relation.rs +++ /dev/null @@ -1,224 +0,0 @@ -use core::hash::{Hash, Hasher}; -use derive_more::From; -use spacetimedb_execution::Row; -use spacetimedb_lib::db::auth::StAccess; -use spacetimedb_sats::bsatn::{ser::BsatnError, BufReservedFill, ToBsatn}; -use spacetimedb_sats::buffer::BufWriter; -use spacetimedb_sats::product_value::ProductValue; -use spacetimedb_sats::{impl_serialize, AlgebraicValue}; -use spacetimedb_schema::relation::{ColExpr, ColExprRef, Header}; -use spacetimedb_table::read_column::ReadColumn; -use spacetimedb_table::table::RowRef; -use std::borrow::Cow; -use std::sync::Arc; - -/// RelValue represents either a reference to a row in a table, -/// a reference to an inserted row, -/// or an ephemeral row constructed during query execution. -/// -/// A `RelValue` is the type generated/consumed by queries. -#[derive(Debug, Clone, From)] -pub enum RelValue<'a> { - /// A reference to a row in a table. - Row(RowRef<'a>), - /// An ephemeral row made during query execution. - Projection(ProductValue), - /// A row coming directly from a collected update. - /// - /// This is really a row in a table, and not an actual projection. - /// However, for (lifetime) reasons, we cannot (yet) keep it as a `RowRef<'_>` - /// and must convert that into a `ProductValue`. - ProjRef(&'a ProductValue), -} - -impl<'a> From> for RelValue<'a> { - fn from(value: Row<'a>) -> Self { - match value { - Row::Ptr(ptr) => Self::Row(ptr), - Row::Ref(ptr) => Self::ProjRef(ptr), - } - } -} - -impl Eq for RelValue<'_> {} - -impl PartialEq for RelValue<'_> { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Projection(x), Self::Projection(y)) => x == y, - (Self::ProjRef(x), Self::ProjRef(y)) => x == y, - (Self::Row(x), Self::Row(y)) => x == y, - (Self::Projection(x), Self::ProjRef(y)) | (Self::ProjRef(y), Self::Projection(x)) => x == *y, - (Self::Row(x), Self::Projection(y)) | (Self::Projection(y), Self::Row(x)) => x == y, - (Self::Row(x), Self::ProjRef(y)) | (Self::ProjRef(y), Self::Row(x)) => x == *y, - } - } -} - -impl Hash for RelValue<'_> { - fn hash(&self, state: &mut H) { - match self { - // `x.hash(state)` and `x.to_product_value().hash(state)` - // have the same effect on `state`. - Self::Row(x) => x.hash(state), - Self::Projection(x) => x.hash(state), - Self::ProjRef(x) => x.hash(state), - } - } -} - -impl_serialize!(['a] RelValue<'a>, (self, ser) => match self { - Self::Row(row) => row.serialize(ser), - Self::Projection(row) => row.serialize(ser), - Self::ProjRef(row) => row.serialize(ser), -}); - -impl<'a> RelValue<'a> { - /// Converts `self` into a [`ProductValue`] - /// either by reading a value from a table, - /// cloning the reference to a `ProductValue`, - /// or consuming the owned product. - pub fn into_product_value(self) -> ProductValue { - match self { - Self::Row(row) => row.to_product_value(), - Self::Projection(row) => row, - Self::ProjRef(row) => row.clone(), - } - } - - /// Converts `self` into a `Cow<'a, ProductValue>` - /// either by reading a value from a table, - /// passing the reference to a `ProductValue`, - /// or consuming the owned product. - pub fn into_product_value_cow(self) -> Cow<'a, ProductValue> { - match self { - Self::Row(row) => Cow::Owned(row.to_product_value()), - Self::Projection(row) => Cow::Owned(row), - Self::ProjRef(row) => Cow::Borrowed(row), - } - } - - /// Computes the number of columns in this value. - pub fn num_columns(&self) -> usize { - match self { - Self::Row(row_ref) => row_ref.row_layout().product().elements.len(), - Self::Projection(row) => row.elements.len(), - Self::ProjRef(row) => row.elements.len(), - } - } - - /// Extends `self` with the columns in `other`. - /// - /// This will always cause `RowRef<'_>`s to be read out into [`ProductValue`]s. - pub fn extend(self, other: RelValue<'a>) -> RelValue<'a> { - let mut x: Vec<_> = self.into_product_value().elements.into(); - x.extend(other.into_product_value()); - RelValue::Projection(x.into()) - } - - /// Read the column at index `col`. - /// - /// Use `read_or_take_column` instead if you have ownership of `self`. - pub fn read_column(&self, col: usize) -> Option> { - match self { - Self::Row(row_ref) => AlgebraicValue::read_column(*row_ref, col).ok().map(Cow::Owned), - Self::Projection(pv) => pv.elements.get(col).map(Cow::Borrowed), - Self::ProjRef(pv) => pv.elements.get(col).map(Cow::Borrowed), - } - } - - /// Returns a column either at the index specified in `col`, - /// or the column is the value that `col` holds. - /// - /// Panics if, for `ColExprRef::Col(col)`, the `col` is out of bounds of `self`. - pub fn get(&'a self, col: ColExprRef<'a>) -> Cow<'a, AlgebraicValue> { - match col { - ColExprRef::Col(col) => self.read_column(col.idx()).unwrap(), - ColExprRef::Value(x) => Cow::Borrowed(x), - } - } - - /// Reads or takes the column at `col`. - /// Calling this method consumes the column at `col` for a `RelValue::Projection`, - /// so it should not be called again for the same input. - /// - /// Panics if `col` is out of bounds of `self`. - pub fn read_or_take_column(&mut self, col: usize) -> Option { - match self { - Self::Row(row_ref) => AlgebraicValue::read_column(*row_ref, col).ok(), - Self::Projection(pv) => pv.elements.get_mut(col).map(AlgebraicValue::take), - Self::ProjRef(pv) => pv.elements.get(col).cloned(), - } - } - - /// Turns `cols` into a product - /// where a value in `cols` is taken directly from it and indices are taken from `self`. - /// - /// Panics on an index that is out of bounds of `self`. - pub fn project_owned(mut self, cols: &[ColExpr]) -> ProductValue { - cols.iter() - .map(|col| match col { - ColExpr::Col(col) => self.read_or_take_column(col.idx()).unwrap(), - ColExpr::Value(x) => x.clone(), - }) - .collect() - } -} - -impl ToBsatn for RelValue<'_> { - fn to_bsatn_vec(&self) -> Result, BsatnError> { - match self { - RelValue::Row(this) => this.to_bsatn_vec(), - RelValue::Projection(this) => this.to_bsatn_vec(), - RelValue::ProjRef(this) => (*this).to_bsatn_vec(), - } - } - fn to_bsatn_extend(&self, buf: &mut (impl BufWriter + BufReservedFill)) -> Result<(), BsatnError> { - match self { - RelValue::Row(this) => this.to_bsatn_extend(buf), - RelValue::Projection(this) => this.to_bsatn_extend(buf), - RelValue::ProjRef(this) => this.to_bsatn_extend(buf), - } - } - fn static_bsatn_size(&self) -> Option { - match self { - RelValue::Row(this) => this.static_bsatn_size(), - RelValue::Projection(this) => this.static_bsatn_size(), - RelValue::ProjRef(this) => this.static_bsatn_size(), - } - } -} - -/// An in-memory table -// TODO(perf): Remove `Clone` impl. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct MemTable { - pub head: Arc

, - pub data: Vec, - pub table_access: StAccess, -} - -impl MemTable { - pub fn new(head: Arc
, table_access: StAccess, data: Vec) -> Self { - assert_eq!( - head.fields.len(), - data.first() - .map(|pv| pv.elements.len()) - .unwrap_or_else(|| head.fields.len()), - "number of columns in `header.len() != data.len()`" - ); - Self { - head, - data, - table_access, - } - } - - pub fn from_iter(head: Arc
, data: impl IntoIterator) -> Self { - Self { - head, - data: data.into_iter().collect(), - table_access: StAccess::Public, - } - } -} diff --git a/docker-compose.yml b/docker-compose.yml index 718281892dd..89019d9effe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,6 @@ services: - ./crates/bindings:/usr/src/app/crates/bindings - ./crates/bindings-macro:/usr/src/app/crates/bindings-macro - ./crates/bindings-sys:/usr/src/app/crates/bindings-sys - - ./crates/vm:/usr/src/app/crates/vm - ./crates/metrics:/usr/src/app/crates/metrics - ./crates/client-api-messages:/usr/src/app/crates/client-api-messages - ./Cargo.toml:/usr/src/app/Cargo.toml