Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
- **Analytics Performance**:
- **Columnar Storage**: Binary-per-column persistence for efficient analytical scanning.
- **Vectorized Execution**: Batch-at-a-time processing model for high-throughput query execution.
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC).
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC) and connection-aware execution state supporting `BEGIN`, `COMMIT`, and `ROLLBACK`.
- **Advanced Execution Engine**:
- **Full Outer Join Support**: Specialized `HashJoinOperator` implementing `LEFT`, `RIGHT`, and `FULL` outer join semantics with automatic null-padding.
- **B+ Tree Indexing**: Persistent indexing for high-speed point lookups and optimized query planning.
- **Type-Safe Value System**: Robust handling of SQL data types using `std::variant`.
- **Volcano & Vectorized Engine**: Flexible execution models supporting traditional row-based and high-performance columnar processing.
- **PostgreSQL Wire Protocol**: Handshake and simple query protocol implementation for tool compatibility.
Expand Down Expand Up @@ -46,17 +49,18 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
mkdir build
cd build
cmake ..
make -j$(nproc)
make -j$(nproc) # Or ./tests/run_test.sh for automated multi-OS build
```

### Running Tests

```bash
# Run all tests
# Run the integrated test suite (Unit + E2E + Logic)
./tests/run_test.sh

# Or run individual binaries
./build/sqlEngine_tests
# Run distributed-specific tests
./build/distributed_tests
./build/distributed_txn_tests
```

### Starting the Cluster
Expand Down
8 changes: 4 additions & 4 deletions docs/phases/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ This directory contains the technical documentation for the lifecycle of the clo

### Phase 9 — Stability & Testing Refinement
**Focus**: Engine Robustness & E2E Validation.
- Slotted-page layout fixes for large table support.
- Buffer Pool Manager lifecycle management (destructor flushing).
- Robust Python E2E client with partial-read handling and numeric validation.
- Standardized test orchestration via `run_test.sh`.
- **Advanced Execution**: Full support for `LEFT`, `RIGHT`, and `FULL` outer joins.
- **Transactional Integrity**: Persistent connection-based execution state and comprehensive `ROLLBACK` support for all DML operations.
- **Logic Validation**: Integration of the SqlLogicTest (SLT) suite with 80+ logic test cases covering Joins, Transactions, Aggregates, and Indexes.
- **Automation**: Standardized cross-platform test orchestration via `run_test.sh` with automatic CPU detection.

---

Expand Down
6 changes: 3 additions & 3 deletions include/executor/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,12 @@ class HashJoinOperator : public Operator {
class LimitOperator : public Operator {
private:
std::unique_ptr<Operator> child_;
uint64_t limit_;
uint64_t offset_;
int64_t limit_;
int64_t offset_;
uint64_t current_count_ = 0;

public:
LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset = 0);
LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset = 0);

bool init() override;
bool open() override;
Expand Down
1 change: 1 addition & 0 deletions include/executor/query_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class QueryExecutor {

QueryResult execute_select(const parser::SelectStatement& stmt, transaction::Transaction* txn);
QueryResult execute_create_table(const parser::CreateTableStatement& stmt);
QueryResult execute_create_index(const parser::CreateIndexStatement& stmt);
QueryResult execute_drop_table(const parser::DropTableStatement& stmt);
QueryResult execute_drop_index(const parser::DropIndexStatement& stmt);
QueryResult execute_insert(const parser::InsertStatement& stmt, transaction::Transaction* txn);
Expand Down
6 changes: 3 additions & 3 deletions include/parser/statement.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class SelectStatement : public Statement {
std::vector<std::unique_ptr<Expression>> group_by_;
std::unique_ptr<Expression> having_;
std::vector<std::unique_ptr<Expression>> order_by_;
int64_t limit_ = 0;
int64_t offset_ = 0;
int64_t limit_ = -1;
int64_t offset_ = -1;
bool distinct_ = false;

public:
Expand Down Expand Up @@ -112,7 +112,7 @@ class SelectStatement : public Statement {
[[nodiscard]] int64_t limit() const { return limit_; }
[[nodiscard]] int64_t offset() const { return offset_; }
[[nodiscard]] bool distinct() const { return distinct_; }
[[nodiscard]] bool has_limit() const { return limit_ > 0; }
[[nodiscard]] bool has_limit() const { return limit_ >= 0; }
[[nodiscard]] bool has_offset() const { return offset_ > 0; }

[[nodiscard]] std::string to_string() const override;
Expand Down
1 change: 1 addition & 0 deletions include/parser/token.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ enum class TokenType : uint8_t {
Join,
Left,
Right,
Full,
Inner,
Outer,
Order,
Expand Down
6 changes: 6 additions & 0 deletions include/storage/heap_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ class HeapTable {
*/
bool physical_remove(const TupleId& tuple_id);

/**
* @brief Resets xmax to 0 (used for rollback of a DELETE)
* @return true on success
*/
bool undo_remove(const TupleId& tuple_id);

/**
* @brief Replaces an existing record with new data
* @param tuple_id The record to update
Expand Down
7 changes: 5 additions & 2 deletions include/transaction/transaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <atomic>
#include <mutex>
#include <optional>
#include <unordered_set>
#include <vector>

Expand Down Expand Up @@ -55,6 +56,7 @@ struct UndoLog {
Type type = Type::INSERT;
std::string table_name;
storage::HeapTable::TupleId rid;
std::optional<storage::HeapTable::TupleId> old_rid;
};

/**
Expand Down Expand Up @@ -119,8 +121,9 @@ class Transaction {
}

void add_undo_log(UndoLog::Type type, const std::string& table_name,
const storage::HeapTable::TupleId& rid) {
undo_logs_.push_back({type, table_name, rid});
const storage::HeapTable::TupleId& rid,
std::optional<storage::HeapTable::TupleId> old_rid = std::nullopt) {
undo_logs_.push_back({type, table_name, rid, old_rid});
Comment on lines 123 to +126
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Make UPDATE undo logs impossible to record without old_rid.

src/transaction/transaction_manager.cpp only restores the old tuple version for UndoLog::Type::UPDATE when old_rid is present. With this signature, a caller can forget the fourth argument and rollback will silently remove the new tuple while leaving the old one hidden. This API should enforce that invariant instead of relying on every call site to remember it.

One way to enforce the invariant
 void add_undo_log(UndoLog::Type type, const std::string& table_name,
                   const storage::HeapTable::TupleId& rid,
                   std::optional<storage::HeapTable::TupleId> old_rid = std::nullopt) {
+    if (type == UndoLog::Type::UPDATE && !old_rid.has_value()) {
+        throw std::invalid_argument("UPDATE undo log requires old_rid");
+    }
     undo_logs_.push_back({type, table_name, rid, old_rid});
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@include/transaction/transaction.hpp` around lines 123 - 126, The add_undo_log
API currently allows creating an UPDATE undo log without old_rid; change the API
to enforce that UPDATE must provide old_rid by splitting into two overloads:
add_undo_log(UndoLog::Type type, const std::string& table_name, const
storage::HeapTable::TupleId& rid) for non-UPDATE types and
add_undo_log(UndoLog::Type type, const std::string& table_name, const
storage::HeapTable::TupleId& rid, const storage::HeapTable::TupleId& old_rid)
for UPDATE, and update both overloads to validate the invariant (e.g., assert or
throw if the caller uses the wrong overload: first must ensure type !=
UndoLog::Type::UPDATE, second should ensure type == UndoLog::Type::UPDATE)
before pushing into undo_logs_, so callers cannot omit old_rid for UPDATE and
rollback logic that checks old_rid remains correct.

}

[[nodiscard]] const std::vector<UndoLog>& get_undo_logs() const { return undo_logs_; }
Expand Down
17 changes: 12 additions & 5 deletions src/executor/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,12 @@ bool IndexScanOperator::next(Tuple& out_tuple) {
while (current_match_index_ < matching_ids_.size()) {
const auto& tid = matching_ids_[current_match_index_++];

storage::HeapTable::TupleId rid;
rid.page_num = tid.page_num;
rid.slot_num = tid.slot_num;

storage::HeapTable::TupleMeta meta;
if (table_->get_meta(tid, meta)) {
if (table_->get_meta(rid, meta)) {
/* MVCC Visibility Check */
bool visible = true;
const Transaction* const txn = get_txn();
Expand Down Expand Up @@ -734,7 +738,7 @@ void HashJoinOperator::add_child(std::unique_ptr<Operator> child) {

/* --- LimitOperator --- */

LimitOperator::LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset)
LimitOperator::LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset)
: Operator(OperatorType::Limit, child->get_txn(), child->get_lock_manager()),
child_(std::move(child)),
limit_(limit),
Expand All @@ -750,17 +754,20 @@ bool LimitOperator::open() {
}

/* Skip offset rows */
current_count_ = 0;
Tuple tuple;
while (current_count_ < offset_ && child_->next(tuple)) {
current_count_++;
if (offset_ > 0) {
while (current_count_ < static_cast<uint64_t>(offset_) && child_->next(tuple)) {
current_count_++;
}
}
current_count_ = 0;
set_state(ExecState::Open);
return true;
}

bool LimitOperator::next(Tuple& out_tuple) {
if (current_count_ >= limit_) {
if (limit_ >= 0 && current_count_ >= static_cast<uint64_t>(limit_)) {
set_state(ExecState::Done);
return false;
}
Expand Down
Loading