diff --git a/Cargo.lock b/Cargo.lock index 26975c460..21ca13af7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1317,7 +1317,7 @@ dependencies = [ "ff 0.13.1", "group 0.13.0", "log", - "memmap2", + "memmap2 0.5.10", "pairing 0.23.0", "rand 0.8.5", "rand_core 0.6.4", @@ -3385,6 +3385,18 @@ dependencies = [ "serde", ] +[[package]] +name = "erasure-encoding" +version = "0.1.0" +dependencies = [ + "blake3", + "memmap2 0.9.10", + "rand 0.8.5", + "reed-solomon-simd", + "tempfile", + "thiserror 1.0.69", +] + [[package]] name = "errno" version = "0.3.14" @@ -5243,7 +5255,7 @@ dependencies = [ "iowrap", "lazy_static", "log", - "memmap2", + "memmap2 0.5.10", "merkletree", "once_cell", "rand 0.8.5", @@ -5310,6 +5322,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.1.8" @@ -7315,9 +7333,11 @@ dependencies = [ "anyhow", "async-trait", "blake2b_simd", + "blake3", "bls-signatures 0.13.1", "bytes", "clap 4.5.54", + "erasure-encoding", "ethers", "fendermint_actor_blobs_shared", "fendermint_actor_bucket", @@ -7340,8 +7360,6 @@ dependencies = [ "prometheus", "prometheus_exporter", "rand 0.8.5", - "recall_entangler", - "recall_entangler_storage", "reqwest 0.11.27", "serde", "serde_json", @@ -9218,6 +9236,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + [[package]] name = "merkle-tree-rs" version = "0.1.0" @@ -9239,7 +9266,7 @@ dependencies = [ "anyhow", "arrayref", "log", - "memmap2", + "memmap2 0.5.10", "positioned-io", "rayon", "serde", @@ -10418,7 +10445,7 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", "indexmap 2.13.0", ] @@ -11590,44 +11617,10 @@ dependencies = [ ] [[package]] -name = "recall_entangler" -version = "0.1.0" -source = "git+https://github.com/recallnet/entanglement.git?rev=aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc#aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc" -dependencies = [ - "anyhow", - "async-trait", - "bytes", - "cid 0.10.1", - "futures", - "iroh", - "iroh-blobs", - "recall_entangler_storage", - "serde", - "serde_json", - "thiserror 2.0.17", - "tokio", - "tokio-stream", -] - -[[package]] -name = "recall_entangler_storage" -version = "0.1.0" -source = "git+https://github.com/recallnet/entanglement.git?rev=aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc#aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc" -dependencies = [ - "anyhow", - "async-trait", - "bytes", - "cid 0.10.1", - "futures", - "futures-lite 2.6.1", - "iroh", - "iroh-blobs", - "serde", - "serde_json", - "thiserror 2.0.17", - "tokio", - "uuid 1.19.0", -] +name = "readme-rustdocifier" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ad765b21a08b1a8e5cdce052719188a23772bcbefb3c439f0baaf62c56ceac" [[package]] name = "redb" @@ -11667,6 +11660,18 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "reed-solomon-simd" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffef0520d30fbd4151fb20e262947ae47fb0ab276a744a19b6398438105a072" +dependencies = [ + "cpufeatures", + "fixedbitset 0.5.7", + "once_cell", + "readme-rustdocifier", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -13393,7 +13398,7 @@ dependencies = [ "itertools 0.13.0", "lazy_static", "log", - "memmap2", + "memmap2 0.5.10", "merkletree", "num_cpus", "rand 0.8.5", @@ -13431,7 +13436,7 @@ dependencies = [ "lazy_static", "libc", "log", - "memmap2", + "memmap2 0.5.10", "merkletree", "neptune", "num-bigint", @@ -13483,7 +13488,7 @@ dependencies = [ "generic-array 0.14.7", "lazy_static", "log", - "memmap2", + "memmap2 0.5.10", "merkletree", "neptune", "rayon", diff --git a/Cargo.toml b/Cargo.toml index d53262f3e..15f420db7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ members = [ "fendermint/actors/ipc_storage_config/shared", # storage components (netwatch patched for socket2 0.5 compatibility!) + "ipc-storage/erasure-encoding", "ipc-storage/iroh_manager", "ipc-storage/ipld", "ipc-storage/actor_sdk", @@ -165,6 +166,7 @@ libp2p-bitswap = { path = "ext/libp2p-bitswap" } libsecp256k1 = "0.7" literally = "0.1.3" log = "0.4" +memmap2 = "0.9" lru_time_cache = "0.11" multiaddr = "0.18" multihash = { version = "0.18.1", default-features = false, features = [ @@ -184,14 +186,12 @@ quickcheck = "1" quickcheck_async = "0.1" quickcheck_macros = "1" rand = "0.8" +reed-solomon-simd = "3" rand_chacha = "0.3" regex = "1" replace_with = "0.1.7" statrs = "0.18.0" reqwest = { version = "0.11.13", features = ["json"] } -# entanglement library -entangler = { package = "recall_entangler", git = "https://github.com/recallnet/entanglement.git", rev = "aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc" } -entangler_storage = { package = "recall_entangler_storage", git = "https://github.com/recallnet/entanglement.git", rev = "aee1c675ff05e5cde4771a2e2eb3ac4dab8476bc" } # Objects HTTP API dependencies warp = "0.3" uuid = { version = "1.0", features = ["v4"] } diff --git a/fendermint/actors/blobs/shared/src/blobs/blob.rs b/fendermint/actors/blobs/shared/src/blobs/blob.rs index b8f8f0014..46a996889 100644 --- a/fendermint/actors/blobs/shared/src/blobs/blob.rs +++ b/fendermint/actors/blobs/shared/src/blobs/blob.rs @@ -21,4 +21,8 @@ pub struct Blob { pub subscribers: HashMap, /// Blob status. pub status: BlobStatus, + /// Number of data shards per chunk for erasure encoding (k). + pub data_shards: u16, + /// Number of parity shards per chunk for erasure encoding (m). + pub parity_shards: u16, } diff --git a/fendermint/actors/blobs/shared/src/blobs/params.rs b/fendermint/actors/blobs/shared/src/blobs/params.rs index 0b6123802..48456c3fa 100644 --- a/fendermint/actors/blobs/shared/src/blobs/params.rs +++ b/fendermint/actors/blobs/shared/src/blobs/params.rs @@ -30,6 +30,10 @@ pub struct AddBlobParams { /// Blob time-to-live epochs. /// If not specified, the current default TTL from the config actor is used. pub ttl: Option, + /// Number of data shards per chunk for erasure encoding (k). + pub data_shards: u16, + /// Number of parity shards per chunk for erasure encoding (m). + pub parity_shards: u16, } /// Params for getting a blob. diff --git a/fendermint/actors/blobs/src/actor/system.rs b/fendermint/actors/blobs/src/actor/system.rs index 6909d9dcc..2d5c601c4 100644 --- a/fendermint/actors/blobs/src/actor/system.rs +++ b/fendermint/actors/blobs/src/actor/system.rs @@ -212,7 +212,12 @@ impl BlobsActor { ) } - /// Verify aggregated BLS signatures for blob finalization + /// Verify aggregated BLS signatures for blob finalization. + /// + /// Only operators assigned to store shards of this blob are required to sign. + /// The set of assigned operators is computed deterministically from the blob hash, + /// blob size, encoding parameters (data_shards, parity_shards), and the active + /// operator list. The quorum threshold is 2/3+ of the assigned operators. fn verify_blob_signatures( rt: &impl Runtime, params: &FinalizeBlobParams, @@ -237,7 +242,52 @@ impl BlobsActor { )); } - // Extract signer indices from bitmap and collect their public keys + // Look up blob to get encoding parameters + let blob = state + .get_blob(rt.store(), params.hash)? + .ok_or_else(|| ActorError::not_found(format!("Blob {} not found", params.hash)))?; + + let data_shards = blob.data_shards as usize; + let parity_shards = blob.parity_shards as usize; + let shards_per_chunk = data_shards + parity_shards; + + // Compute number of chunks from blob size + const MAX_CHUNK_SIZE: u64 = 16 * 1024 * 1024; // 16 MiB, matches erasure-encoding + let num_chunks = if params.size == 0 { + 1 + } else { + ((params.size + MAX_CHUNK_SIZE - 1) / MAX_CHUNK_SIZE) as usize + }; + + // Compute the set of unique assigned operator indices using the deterministic + // assignment formula from erasure-encoding: + // rotation_offset = blob_hash (big-endian) % num_nodes + // node_index = (chunk_index * shards_per_chunk + shard_index + rotation_offset) % num_nodes + let rotation_offset = { + let mut remainder: u64 = 0; + for &byte in ¶ms.hash.0 { + remainder = (remainder * 256 + byte as u64) % total_operators as u64; + } + remainder as usize + }; + + let mut assigned_indices = std::collections::BTreeSet::new(); + for chunk_idx in 0..num_chunks { + for shard_idx in 0..shards_per_chunk { + let shard_global = chunk_idx * shards_per_chunk + shard_idx; + let node_index = (shard_global + rotation_offset) % total_operators; + assigned_indices.insert(node_index); + } + } + + let assigned_count = assigned_indices.len(); + if assigned_count == 0 { + return Err(ActorError::illegal_state( + "No operators assigned to blob".into(), + )); + } + + // Extract signer indices from bitmap, only counting assigned operators let mut signer_pubkeys = Vec::new(); let mut signer_count = 0; @@ -248,6 +298,11 @@ impl BlobsActor { // Check if this operator signed (bit is set in bitmap) if (params.signer_bitmap & (1u128 << index)) != 0 { + // Only count signers that are in the assigned set + if !assigned_indices.contains(&index) { + continue; + } + signer_count += 1; // Get operator info to retrieve BLS public key @@ -274,12 +329,12 @@ impl BlobsActor { } } - // Check threshold: need at least 2/3+ of operators - let threshold = (total_operators * 2 + 2) / 3; // Ceiling of 2/3 + // Check threshold: need at least 2/3+ of assigned operators + let threshold = (assigned_count * 2 + 2) / 3; // Ceiling of 2/3 if signer_count < threshold { return Err(ActorError::illegal_argument(format!( - "Insufficient signatures: got {}, need {} out of {}", - signer_count, threshold, total_operators + "Insufficient signatures: got {}, need {} out of {} assigned operators", + signer_count, threshold, assigned_count ))); } @@ -294,7 +349,6 @@ impl BlobsActor { let messages: Vec<&[u8]> = vec![hash_bytes; signer_count]; // Verify the aggregated signature using verify_messages - // This verifies that the aggregated signature corresponds to the individual signatures let verification_result = verify_messages(&aggregated_sig, &messages, &signer_pubkeys); if !verification_result { @@ -304,8 +358,9 @@ impl BlobsActor { } log::info!( - "BLS signature verified: {} operators signed (threshold: {}/{})", + "BLS signature verified: {}/{} assigned operators signed (threshold: {}, total operators: {})", signer_count, + assigned_count, threshold, total_operators ); diff --git a/fendermint/actors/blobs/src/sol_facade/blobs.rs b/fendermint/actors/blobs/src/sol_facade/blobs.rs index 581972d55..4f2426f33 100644 --- a/fendermint/actors/blobs/src/sol_facade/blobs.rs +++ b/fendermint/actors/blobs/src/sol_facade/blobs.rs @@ -147,6 +147,8 @@ impl AbiCallRuntime for sol::addBlobCall { size, ttl, from, + data_shards: self.dataShards, + parity_shards: self.parityShards, }) } fn returns(&self, returns: Self::Returns) -> Self::Output { @@ -264,6 +266,8 @@ impl AbiCallRuntime for sol::overwriteBlobCall { size, ttl, from, + data_shards: self.dataShards, + parity_shards: self.parityShards, }, }) } diff --git a/fendermint/actors/blobs/src/state/blobs/blob.rs b/fendermint/actors/blobs/src/state/blobs/blob.rs index efa8221ce..0b0e826ad 100644 --- a/fendermint/actors/blobs/src/state/blobs/blob.rs +++ b/fendermint/actors/blobs/src/state/blobs/blob.rs @@ -47,6 +47,10 @@ pub struct Blob { pub subscribers: Subscribers, /// Blob status. pub status: BlobStatus, + /// Number of data shards per chunk for erasure encoding (k). + pub data_shards: u16, + /// Number of parity shards per chunk for erasure encoding (m). + pub parity_shards: u16, } impl Blob { @@ -55,12 +59,16 @@ impl Blob { store: &BS, size: u64, metadata_hash: B256, + data_shards: u16, + parity_shards: u16, ) -> Result { Ok(Self { size, metadata_hash, subscribers: Subscribers::new(store)?, status: BlobStatus::Added, + data_shards, + parity_shards, }) } @@ -81,6 +89,8 @@ impl Blob { metadata_hash: self.metadata_hash, subscribers, status: self.status.clone(), + data_shards: self.data_shards, + parity_shards: self.parity_shards, }) } } @@ -249,7 +259,7 @@ impl Blobs { let (mut blob, blob_added) = if let Some(blob) = blobs.get(¶ms.hash)? { (blob, false) } else { - (Blob::new(store, params.size, params.metadata_hash)?, true) + (Blob::new(store, params.size, params.metadata_hash, params.data_shards, params.parity_shards)?, true) }; // Add/update subscriber and the subscription diff --git a/fendermint/actors/blobs/src/state/blobs/methods.rs b/fendermint/actors/blobs/src/state/blobs/methods.rs index 64b5c3082..51013a661 100644 --- a/fendermint/actors/blobs/src/state/blobs/methods.rs +++ b/fendermint/actors/blobs/src/state/blobs/methods.rs @@ -650,6 +650,8 @@ impl State { source: subscription.source, epoch: current_epoch, token_amount: TokenAmount::zero(), + data_shards: blob.data_shards, + parity_shards: blob.parity_shards, }, )?; } diff --git a/fendermint/actors/blobs/src/state/blobs/params.rs b/fendermint/actors/blobs/src/state/blobs/params.rs index 5d55fcf87..f788d36f2 100644 --- a/fendermint/actors/blobs/src/state/blobs/params.rs +++ b/fendermint/actors/blobs/src/state/blobs/params.rs @@ -28,6 +28,10 @@ pub struct AddBlobStateParams { pub epoch: ChainEpoch, /// Token amount sent with the transaction. pub token_amount: TokenAmount, + /// Number of data shards per chunk for erasure encoding (k). + pub data_shards: u16, + /// Number of parity shards per chunk for erasure encoding (m). + pub parity_shards: u16, } impl AddBlobStateParams { @@ -45,6 +49,8 @@ impl AddBlobStateParams { ttl: params.ttl, epoch, token_amount, + data_shards: params.data_shards, + parity_shards: params.parity_shards, } } } diff --git a/fendermint/actors/bucket/src/actor.rs b/fendermint/actors/bucket/src/actor.rs index 8cacd059c..b3f3417fa 100644 --- a/fendermint/actors/bucket/src/actor.rs +++ b/fendermint/actors/bucket/src/actor.rs @@ -77,6 +77,8 @@ impl Actor { id: sub_id, size: params.size, ttl: params.ttl, + data_shards: params.data_shards, + parity_shards: params.parity_shards, }, }, )? @@ -99,6 +101,8 @@ impl Actor { id: sub_id, size: params.size, ttl: params.ttl, + data_shards: params.data_shards, + parity_shards: params.parity_shards, }, )? }; diff --git a/fendermint/actors/bucket/src/shared.rs b/fendermint/actors/bucket/src/shared.rs index ad7f597b0..a7f1f20ff 100644 --- a/fendermint/actors/bucket/src/shared.rs +++ b/fendermint/actors/bucket/src/shared.rs @@ -57,6 +57,10 @@ pub struct AddParams { pub metadata: HashMap, /// Whether to overwrite a key if it already exists. pub overwrite: bool, + /// Number of data shards per chunk for erasure encoding (k). + pub data_shards: u16, + /// Number of parity shards per chunk for erasure encoding (m). + pub parity_shards: u16, } /// Key of the object to delete from a bucket. diff --git a/fendermint/actors/bucket/src/sol_facade.rs b/fendermint/actors/bucket/src/sol_facade.rs index 5c1fa0018..4fdc9f047 100644 --- a/fendermint/actors/bucket/src/sol_facade.rs +++ b/fendermint/actors/bucket/src/sol_facade.rs @@ -129,6 +129,8 @@ impl AbiCall for sol::addObject_0Call { ttl: None, metadata: HashMap::default(), overwrite: false, + data_shards: self.dataShards, + parity_shards: self.parityShards, } } @@ -166,6 +168,8 @@ impl AbiCall for sol::addObject_1Call { ttl, metadata, overwrite, + data_shards: self.dataShards, + parity_shards: self.parityShards, } } fn returns(&self, returns: Self::Returns) -> Self::Output { diff --git a/fendermint/vm/interpreter/src/fvm/interpreter.rs b/fendermint/vm/interpreter/src/fvm/interpreter.rs index 5a3cb5bc5..56498145e 100644 --- a/fendermint/vm/interpreter/src/fvm/interpreter.rs +++ b/fendermint/vm/interpreter/src/fvm/interpreter.rs @@ -267,8 +267,8 @@ where }) .collect::>(); - let signed_msgs = - select_messages_above_base_fee(signed_msgs, state.block_gas_tracker().base_fee()); + // let signed_msgs = + // select_messages_above_base_fee(signed_msgs, state.block_gas_tracker().base_fee()); let total_gas_limit = state.block_gas_tracker().available(); let signed_msgs_iter = select_messages_by_gas_limit(signed_msgs, total_gas_limit) diff --git a/ipc-storage/DESIGN.md b/ipc-storage/DESIGN.md new file mode 100644 index 000000000..9eaff6052 --- /dev/null +++ b/ipc-storage/DESIGN.md @@ -0,0 +1,1167 @@ +# IPC Storage: Replication and Storage Proof Design + +This document describes the design of data replication and storage proof mechanisms in IPC Storage. The system uses **client-side encryption** for data privacy, **Reed-Solomon erasure encoding** for fault-tolerant replication, and **Provable Data Possession (PDP)** based on Merkle proofs for storage verification. + +## Table of Contents + +- [Overview](#overview) +- [System Architecture](#system-architecture) +- [Client-Side Encryption](#client-side-encryption) +- [Data Hierarchy](#data-hierarchy) +- [Replication: Reed-Solomon Erasure Encoding](#replication-reed-solomon-erasure-encoding) +- [Merkle Tree Construction](#merkle-tree-construction) +- [On-Chain Commitment](#on-chain-commitment) +- [Data Distribution](#data-distribution) +- [Storage Proof (PDP Challenge)](#storage-proof-pdp-challenge) +- [Verification Process](#verification-process) +- [Security Considerations](#security-considerations) +- [Economic Model](#economic-model) +- [References](#references) + +--- + +## Overview + +IPC Storage provides decentralized, verifiable storage with the following guarantees: + +1. **Data Privacy**: Client-side encryption ensures storage nodes cannot read user data. +2. **Data Availability**: Reed-Solomon erasure encoding ensures data can be recovered even if some storage nodes fail or become unavailable. +3. **Data Integrity**: Merkle tree commitments allow efficient verification that stored data matches the original. +4. **Proof of Storage**: Random challenge-response protocol proves that storage nodes actually hold the data they claim to store. + +--- + +## System Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CLIENT │ +│ │ +│ ┌──────────┐ ┌───────────┐ ┌──────────┐ ┌─────────┐ ┌───────────┐ │ +│ │ Data │──▶│ Encrypt │──▶│ Chunk │──▶│ Erasure │──▶│ Merkle │ │ +│ │ │ │ (AES) │ │ Split │ │ Encode │ │ Tree │ │ +│ └──────────┘ └───────────┘ └──────────┘ └─────────┘ └───────────┘ │ +│ │ │ +└──────────────────────────────────────────────────────────────────┼───────────┘ + │ + ┌──────────────────────────────┼──────────┐ + │ ON-CHAIN │ │ + │ ▼ │ + │ ┌──────────────────────────────────┐ │ + │ │ File Merkle Root Commitment │ │ + │ │ + Storage Metadata │ │ + │ └──────────────────────────────────┘ │ + │ │ + │ ┌──────────────────────────────────┐ │ + │ │ Challenge Contract │ │ + │ │ (VRF-based Random Selection) │ │ + │ └──────────────────────────────────┘ │ + └─────────────────────────────────────────┘ + + ┌────────────────┐ ┌────────────────┐ ┌────────────────┐ + │ Storage Node │ │ Storage Node │ │ Storage Node │ + │ (1) │ │ (2) │ │ (n) │ + │ │ │ │ │ │ + │ Encrypted │ │ Encrypted │ │ Encrypted │ + │ Chunk + Proofs │ │ Chunk + Proofs │ │ Chunk + Proofs │ + └────────────────┘ └────────────────┘ └────────────────┘ +``` + +--- + +## Client-Side Encryption + +All data is encrypted on the client before chunking and distribution. Storage nodes only ever see ciphertext and cannot read the underlying data. + +### Encryption Scheme + +| Component | Algorithm | Description | +|-----------|-----------|-------------| +| **Symmetric encryption** | AES-256-GCM | Encrypts the actual data | +| **Key derivation** | HKDF-SHA256 | Derives encryption key from master secret | +| **Key encryption** | ECIES / RSA-OAEP | Encrypts DEK for storage/sharing | + +### Encryption Flow + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CLIENT ENCRYPTION │ +│ │ +│ 1. Generate random Data Encryption Key (DEK) │ +│ DEK = random(256 bits) │ +│ │ +│ 2. Encrypt data with DEK │ +│ ciphertext = AES-256-GCM(plaintext, DEK, nonce) │ +│ │ +│ 3. Encrypt DEK with client's public key (for later retrieval) │ +│ encrypted_dek = ECIES_Encrypt(DEK, client_pubkey) │ +│ │ +│ 4. Store encrypted_dek securely (client-side or key management service) │ +│ │ +│ 5. Proceed with chunking on ciphertext (not plaintext) │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Key Management + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ KEY HIERARCHY │ +│ │ +│ Master Key (client-controlled) │ +│ │ │ +│ ├──▶ File Key 1 (derived via HKDF + file_id) │ +│ │ └──▶ DEK for File 1 │ +│ │ │ +│ ├──▶ File Key 2 (derived via HKDF + file_id) │ +│ │ └──▶ DEK for File 2 │ +│ │ │ +│ └──▶ ... │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Sharing Encrypted Data + +To share data with another user: + +1. Retrieve the encrypted DEK +2. Decrypt DEK with owner's private key +3. Re-encrypt DEK with recipient's public key +4. Share re-encrypted DEK with recipient + +The actual stored data never changes; only key access is granted. + +--- + +## Data Hierarchy + +Data is organized in a three-level hierarchy. All operations occur on **encrypted** data. + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Original Data (Plaintext) │ +└─────────────────────────────────────────────────────────────────┘ + │ + Encryption + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Encrypted Data (Ciphertext) │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────┬───────────┬───────────┬───────────┬─────────────────┐ +│ Chunk 0 │ Chunk 1 │ Chunk 2 │ ... │ Chunk k-1 │ +└───────────┴───────────┴───────────┴───────────┴─────────────────┘ + │ + Erasure Encoding + │ + ▼ +┌───────────┬───────────┬───────────┬───────────┬─────────────────┐ +│ Encoded │ Encoded │ Encoded │ ... │ Encoded │ +│ Chunk 0 │ Chunk 1 │ Chunk 2 │ ... │ Chunk n-1 │ +│ (data) │ (data) │ (parity) │ │ (parity) │ +└───────────┴───────────┴───────────┴───────────┴─────────────────┘ + │ + ▼ +┌────────────────────────────────────────────────────┐ +│ Encoded Chunk │ +├────────────┬────────────┬────────────┬─────────────┤ +│ Piece 0 │ Piece 1 │ ... │ Piece P-1 │ +├────────────┼────────────┼────────────┼─────────────┤ +│ Leaf 0..L │ Leaf 0..L │ ... │ Leaf 0..L │ +└────────────┴────────────┴────────────┴─────────────┘ +``` + +### Terminology + +| Term | Description | Typical Size | +|------|-------------|--------------| +| **Chunk** | A segment of encrypted data before/after erasure encoding | 1-64 MB | +| **Piece** | A subdivision of an encoded chunk | 256 KB - 1 MB | +| **Leaf** | The smallest unit for Merkle tree construction | 256 bytes - 1 KB | + +--- + +## Replication: Reed-Solomon Erasure Encoding + +IPC Storage uses Reed-Solomon erasure coding, similar to [Storj](https://storj.io/)'s approach, to achieve fault-tolerant data storage. The implementation lives in the `erasure-encoding` crate. + +### How Reed-Solomon Works + +Reed-Solomon encoding transforms `k` data shards into `k + m` total shards (where `m` = parity), such that: +- Any `k` of the `k + m` shards are sufficient to reconstruct the original data +- Up to `m` shards can be lost without data loss + +### Encoding Parameters + +| Parameter | Symbol | Description | Example | +|-----------|--------|-------------|---------| +| Data shards | k | Number of original data shards per chunk | 15 | +| Parity shards | m | Number of redundancy shards per chunk | 8 | +| Total shards | n | k + m = total shards per chunk | 23 | +| Max chunk size | — | Maximum bytes per chunk before RS encoding | 16 MiB | +| Expansion factor | n/k | Storage overhead ratio | 1.53x | + +### Architecture + +The crate is built around three core traits: + +| Trait | Purpose | +|-------|---------| +| `Encoder` | Splits raw bytes into `k` padded shards and computes `m` parity shards | +| `Decoder` | Recovers original data from any `k` of `k + m` shards | +| `NodeAssigner` | Maps each shard to a storage node | + +All trait methods are stateless (no `&self`) with an associated `Shard` type. Repair is simply decode followed by encode — no separate `Repairer` trait is needed. + +The concrete implementation uses `reed-solomon-simd` for SIMD-accelerated Galois Field arithmetic (`ReedSolomonEncoder`). + +### Encoding Process + +Large files are split into chunks of up to 16 MiB (`DEFAULT_MAX_CHUNK_SIZE`). Each chunk is independently Reed-Solomon encoded, producing `k + m` shards. This means only one chunk's shards need to be in memory at a time. + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Encrypted Data (Ciphertext, any size) │ +└─────────────────────────────────────────────────────────────────┘ + │ + Split into 16 MiB chunks + │ + ▼ +┌───────────┬───────────┬───────────┬───────────┬─────────────────┐ +│ Chunk 0 │ Chunk 1 │ Chunk 2 │ ... │ Chunk C-1 │ +│ (16 MiB) │ (16 MiB) │ (16 MiB) │ │ (≤ 16 MiB) │ +└───────────┴───────────┴───────────┴───────────┴─────────────────┘ + │ + Per-chunk Reed-Solomon Encoding + │ + ▼ + For each chunk, produce k + m shards: +┌───────────┬───────────┬───────────┬───────────┬─────────────────┐ +│ Shard 0 │ Shard 1 │ ... │ Shard k-1 │ Shard k..k+m-1 │ +│ (data) │ (data) │ │ (data) │ (parity) │ +└───────────┴───────────┴───────────┴───────────┴─────────────────┘ + │ + NodeAssigner distributes + │ + ▼ + Each shard assigned to a storage node +``` + +**Shard padding**: Each chunk is split into `k` equal-sized shards with zero-padding (rounded up to even size, as required by `reed-solomon-simd`). The `original_data_len` is preserved per chunk so padding can be stripped during decoding. + +### Deterministic Shard Assignment + +Shards are mapped to storage nodes deterministically using the `blob_id` and the active node list at the encoding epoch. No mapping table is stored — any party can recompute the assignment: + +``` +rotation_offset = blob_id % num_nodes + +For each chunk's k + m shards: + shard_global = chunk_index * (k + m) + shard_index + node = nodes[(shard_global + rotation_offset) % num_nodes] +``` + +The node list is retrieved from on-chain state at the `encoding_epoch`. This ensures pseudo-random distribution across nodes without storing per-shard assignments. + +### Storage in Iroh + +Each shard is stored in Iroh under a deterministic key: + +``` +key = blob_id / chunk_index / shard_index +value = shard data (encrypted bytes) +``` + +Nodes only store the shards assigned to them. During retrieval, the decoder computes which nodes hold which shards and fetches directly. + +### Decoding Process + +All chunk structure is derivable from `original_len` and the fixed `MAX_CHUNK_SIZE` (16 MiB): + +``` +num_chunks = original_len.div_ceil(MAX_CHUNK_SIZE) +chunk_data_len(i) = min(MAX_CHUNK_SIZE, original_len - i * MAX_CHUNK_SIZE) +``` + +Decoding steps: + +1. Read `blob_id`, `original_len`, `k`, `m`, `encoding_epoch` from chain +2. Derive chunk structure and shard→node mapping from epoch's node list +3. For each chunk, fetch at least `k` shards from their assigned nodes +4. RS-decode each chunk, truncate to `chunk_data_len` (strips RS padding) +5. Concatenate all chunks and truncate to `original_len` + +Per-chunk truncation is essential for multi-chunk files — without it, padding bytes from chunk N would appear before chunk N+1's data, corrupting the output. + +### Advantages (Storj Reference) + +Following Storj's proven model: + +1. **Storage Efficiency**: Configurable expansion (e.g., 1.53x with k=15, m=8) provides better durability than 3x replication +2. **Repair Bandwidth**: Only download `k` shards to repair, not the entire file +3. **Distributed Trust**: No single node holds enough data to reconstruct the file +4. **Flexible Recovery**: Any `k` of `k + m` shards suffice; no specific shards required + +--- + +## Merkle Tree Construction + +The Merkle tree is constructed in **three levels** to enable efficient proofs: + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ MERKLE TREE STRUCTURE │ +│ │ +│ Level 0: File Merkle Root (FMR) ─────────────── submitted on-chain │ +│ │ │ +│ Level 1: Chunk Merkle Roots (CMR₀, CMR₁, ..., CMRₙ₋₁) │ +│ │ │ +│ Level 2: Piece Merkle Roots (PMR₀, PMR₁, ...) ── per chunk │ +│ │ │ +│ Level 3: Leaf hashes ─────────────────────────── per piece │ +│ │ │ +│ Level 4: Raw leaf data (encrypted bytes) │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Level 1: Piece Merkle Trees (Leaves → PMR) + +For each piece within an encoded chunk, compute the Piece Merkle Root from its leaves: + +``` + Piece Merkle Root (PMR) + │ + ┌────────────┴────────────┐ + │ │ + Hash(H₀,H₁) Hash(H₂,H₃) + │ │ + ┌─────┴─────┐ ┌─────┴─────┐ + │ │ │ │ + H₀= H₁= H₂= H₃= + Hash(L₀) Hash(L₁) Hash(L₂) Hash(L₃) + │ │ │ │ + Leaf₀ Leaf₁ Leaf₂ Leaf₃ + (encrypted) (encrypted) (encrypted) (encrypted) +``` + +### Level 2: Chunk Merkle Trees (PMRs → CMR) + +All Piece Merkle Roots within a chunk form the leaves of the Chunk Merkle Tree: + +``` + Chunk Merkle Root (CMR) + │ + ┌────────────────┴────────────────┐ + │ │ + Hash(PMR₀,PMR₁) Hash(PMR₂,PMR₃) + │ │ + ┌─────┴─────┐ ┌─────┴─────┐ + │ │ │ │ + PMR₀ PMR₁ PMR₂ PMR₃ + (Piece 0) (Piece 1) (Piece 2) (Piece 3) +``` + +### Level 3: File Merkle Tree (CMRs → FMR) + +All Chunk Merkle Roots form the leaves of the File Merkle Tree: + +``` + File Merkle Root (FMR) + ━━━━━━━━━━━━━━━━━━━━━━━ + (Submitted On-Chain) + │ + ┌────────────────┴────────────────┐ + │ │ + Hash(CMR₀,CMR₁) Hash(CMR₂,CMR₃) + │ │ + ┌─────┴─────┐ ┌─────┴─────┐ + │ │ │ │ + CMR₀ CMR₁ CMR₂ CMR₃ + (Chunk 0) (Chunk 1) (Chunk 2) (Chunk 3) +``` + +### Summary + +| Level | Input | Output | Count | +|-------|-------|--------|-------| +| 1 | Leaf data (encrypted bytes) | Piece Merkle Root (PMR) | leaves_per_piece leaves → 1 PMR | +| 2 | PMRs for one chunk | Chunk Merkle Root (CMR) | pieces_per_chunk PMRs → 1 CMR | +| 3 | CMRs for all chunks | File Merkle Root (FMR) | n CMRs → 1 FMR | + +--- + +## On-Chain Commitment + +When a client uploads data, the following is submitted on-chain: + +### Storage Commitment Structure + +```solidity +struct StorageCommitment { + bytes32 blobId; // Content-addressed blob identifier + bytes32 fileMerkleRoot; // File Merkle Root (FMR) + uint64 originalLen; // Original encrypted data size in bytes + uint16 dataShards; // Number of data shards per chunk (k) + uint16 parityShards; // Number of parity shards per chunk (m) + uint64 encodingEpoch; // Epoch for node list lookup + uint64 expiryBlock; // Storage expiration block + address owner; // Data owner +} +``` + +Everything else is derivable from these fields: +- `num_chunks = originalLen.div_ceil(MAX_CHUNK_SIZE)` +- `chunk_data_len(i) = min(MAX_CHUNK_SIZE, originalLen - i * MAX_CHUNK_SIZE)` +- Shard→node mapping: deterministic rotation over the epoch's node list + +No per-shard or per-chunk metadata is stored on-chain. + +### Shard Assignment Overrides + +During normal operation, shard→node mapping is computed deterministically. When a repair replaces a failed node, the override is recorded: + +```solidity +struct ShardOverride { + bytes32 blobId; // Which blob + uint32 chunkIndex; // Which chunk + uint16 shardIndex; // Which shard within the chunk + bytes32 newNodeId; // Replacement node +} +``` + +Overrides are only created by the repair process. The decoder checks overrides before falling back to the deterministic mapping. + +--- + +## Data Distribution + +After encoding, the client distributes shards to storage nodes via Iroh P2P: + +### Distribution Flow + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ CLIENT │ +│ │ +│ 1. Encode file → chunks → shards (see Erasure Encoding above) │ +│ 2. Compute Merkle trees (FMR, CMRs, PMRs) │ +│ 3. Submit StorageCommitment on-chain │ +│ 4. For each shard: │ +│ a. Compute target node from deterministic assignment │ +│ b. Store in Iroh: key = blob_id/chunk_index/shard_index │ +│ c. Push shard to assigned node via Iroh P2P │ +│ d. Include Merkle proof (shard → CMR → FMR) for verification │ +│ │ +└────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────────────────────────────────┐ +│ STORAGE NODE │ +│ │ +│ On receiving shard + proof: │ +│ 1. Verify Merkle proof against on-chain FMR │ +│ 2. Verify shard index matches deterministic assignment for this node│ +│ 3. Store shard data in Iroh under blob_id/chunk_index/shard_index │ +│ 4. Sign acknowledgment (BLS signature over shard hash) │ +│ │ +└────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Storage Proof (PDP Challenge) + +The Provable Data Possession (PDP) protocol, inspired by [Filecoin](https://spec.filecoin.io/), uses random challenges to verify storage. + +### Challenge Protocol + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ CHALLENGE CONTRACT │ +│ │ +│ 1. Generate random challenge using VRF: │ +│ - seed = VRF(validator_sk, block_hash || commitment_id) │ +│ - chunk_idx = seed % num_chunks │ +│ - piece_idx = (seed >> 8) % pieces_per_chunk │ +│ - leaf_idx = (seed >> 16) % leaves_per_piece │ +│ │ +│ 2. Emit Challenge event: │ +│ Challenge(commitmentId, chunk_idx, piece_idx, leaf_idx, deadline) │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ STORAGE NODE │ +│ │ +│ On receiving challenge: │ +│ 1. Retrieve leaf data at (piece_idx, leaf_idx) │ +│ 2. Construct Merkle proof (3 levels): │ +│ - Level 1: Leaf → Piece Merkle Root (PMR) │ +│ - Level 2: PMR → Chunk Merkle Root (CMR) │ +│ - Level 3: CMR → File Merkle Root (FMR) │ +│ 3. Submit proof to contract before deadline │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ CHALLENGE CONTRACT │ +│ │ +│ Verify proof: │ +│ 1. Hash the raw leaf data (encrypted bytes) │ +│ 2. Verify path: leaf_hash → PMR (using level-1 proof) │ +│ 3. Verify path: PMR → CMR (using level-2 proof) │ +│ 4. Verify path: CMR → FMR (using level-3 proof) │ +│ 5. Compare FMR with on-chain commitment │ +│ 6. If valid, mark challenge as passed; else slash/penalize │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +### Challenge Data Structure + +```solidity +struct Challenge { + bytes32 commitmentId; // Which storage commitment + uint32 chunkIndex; // Challenged chunk (determines node) + uint32 pieceIndex; // Challenged piece within chunk + uint32 leafIndex; // Challenged leaf within piece + uint64 deadline; // Block number deadline for response + bytes32 vrfProof; // VRF proof for randomness verification +} + +struct ChallengeProof { + bytes leafData; // Raw leaf bytes (encrypted) + bytes32[] leafToPmrPath; // Merkle path: leaf → PMR + bytes32[] pmrToCmrPath; // Merkle path: PMR → CMR + bytes32[] cmrToFmrPath; // Merkle path: CMR → FMR +} +``` + +### Challenge Timing + +| Parameter | Description | Typical Value | +|-----------|-------------|---------------| +| Challenge interval | Time between challenges per commitment | 1 hour | +| Response deadline | Time allowed for proof submission | 10 minutes | +| Consecutive failures | Failures before slashing | 3 | + +**Important**: Response deadline must be shorter than the time required to reconstruct a chunk from other nodes (to prevent lazy node attacks). + +--- + +## Verification Process + +### On-Chain Verification (Solidity) + +```solidity +function verifyChallenge( + bytes32 commitmentId, + Challenge calldata challenge, + ChallengeProof calldata proof +) external returns (bool) { + StorageCommitment storage commitment = commitments[commitmentId]; + + // 1. Verify VRF proof for challenge randomness + require(verifyVRF(challenge.vrfProof, commitmentId), "Invalid VRF"); + + // 2. Verify leaf data size + require(proof.leafData.length == commitment.leafSize, "Invalid leaf size"); + + // 3. Compute leaf hash + bytes32 leafHash = keccak256(proof.leafData); + + // 4. Verify leaf → PMR path + bytes32 computedPMR = computeMerkleRoot( + leafHash, + challenge.leafIndex, + proof.leafToPmrPath + ); + + // 5. Verify PMR → CMR path + bytes32 computedCMR = computeMerkleRoot( + computedPMR, + challenge.pieceIndex, + proof.pmrToCmrPath + ); + + // 6. Verify CMR → FMR path + bytes32 computedFMR = computeMerkleRoot( + computedCMR, + challenge.chunkIndex, + proof.cmrToFmrPath + ); + + // 7. Compare with on-chain commitment + require(computedFMR == commitment.fileMerkleRoot, "Invalid proof"); + + return true; +} + +function computeMerkleRoot( + bytes32 leaf, + uint256 index, + bytes32[] calldata proof +) internal pure returns (bytes32) { + bytes32 current = leaf; + for (uint256 i = 0; i < proof.length; i++) { + if (index % 2 == 0) { + current = keccak256(abi.encodePacked(current, proof[i])); + } else { + current = keccak256(abi.encodePacked(proof[i], current)); + } + index = index / 2; + } + return current; +} +``` + +### Proof Size Analysis + +| Level | Proof Elements | Size per Element | Typical Total | +|-------|---------------|------------------|---------------| +| Leaf → PMR | log₂(leaves_per_piece) | 32 bytes | ~320 bytes (10 levels) | +| PMR → CMR | log₂(pieces_per_chunk) | 32 bytes | ~256 bytes (8 levels) | +| CMR → FMR | log₂(num_chunks) | 32 bytes | ~224 bytes (7 levels) | +| Leaf data | 1 | leaf_size | ~256 bytes | +| **Total** | | | **~1 KB** | + +--- + +## Security Considerations + +### Attack Vectors and Mitigations + +| Attack | Description | Mitigation | +|--------|-------------|------------| +| **Data withholding** | Node claims to store data but doesn't | Random challenges require actual data | +| **Lazy node** | Node reconstructs data on-demand from peers instead of storing | Response deadline < reconstruction time | +| **Proof precomputation** | Precompute all possible proofs | Large leaf count makes this infeasible | +| **Collusion** | Nodes share data only for challenges | Unpredictable VRF-based challenge timing | +| **Sybil attack** | Single entity runs multiple nodes | Stake requirements, reputation system | +| **Grinding** | Manipulate random challenge selection | Verifiable Random Functions (VRF) | +| **Data exposure** | Storage nodes read user data | Client-side encryption (AES-256-GCM) | + +### Lazy Node Attack - Detailed Mitigation + +A malicious node could attempt to: +1. Not store its assigned chunk +2. When challenged, download `k` chunks from other nodes +3. Reconstruct its chunk using Reed-Solomon decoding +4. Respond to challenge with reconstructed data + +**Mitigation**: Set response deadline such that: +``` +deadline < time_to_download_k_chunks + time_to_decode +``` + +For a 16 MB chunk with k=64: +- Download 64 × 16 MB = 1 GB from network +- At 100 Mbps: ~80 seconds download time +- Decoding: ~5-10 seconds +- **Response deadline should be < 60 seconds** + +### On-Chain Randomness + +Challenge randomness must be unpredictable and unbiasable: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ VRF-BASED CHALLENGE SELECTION │ +│ │ +│ Input: │ +│ - validator_secret_key (sk) │ +│ - block_hash (public, from recent finalized block) │ +│ - commitment_id (identifies the storage deal) │ +│ │ +│ Process: │ +│ 1. vrf_output, vrf_proof = VRF_prove(sk, block_hash || id) │ +│ 2. challenge_seed = hash(vrf_output) │ +│ 3. chunk_idx = challenge_seed % num_chunks │ +│ │ +│ Verification: │ +│ - Anyone can verify VRF_verify(pk, block_hash || id, proof) │ +│ - Validator cannot predict/manipulate output │ +│ - Block producer cannot bias (uses past finalized block) │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Cryptographic Assumptions + +| Component | Algorithm | Security Level | +|-----------|-----------|----------------| +| Data encryption | AES-256-GCM | 256-bit | +| Hash function | Keccak-256 | 128-bit collision resistance | +| Randomness | VRF (e.g., ECVRF) | Unpredictable, verifiable | +| Erasure coding | Reed-Solomon GF(2^8) | Information-theoretic | + +### Slashing Conditions + +1. **Failed challenge**: Node fails to provide valid proof within deadline +2. **Invalid proof**: Merkle proof verification fails +3. **Repeated failures**: 3 consecutive failed challenges trigger slashing + +### Grace Period for Transient Failures + +To distinguish between "data lost" and "node temporarily unavailable": + +``` +Challenge States: + - PENDING: Challenge issued, awaiting response + - PASSED: Valid proof submitted + - FAILED: Invalid proof or deadline missed + - GRACE: First failure, node gets grace period + +Slashing Logic: + - 1st failure: Enter GRACE state, no slash + - 2nd consecutive failure: Warning, reduced rewards + - 3rd consecutive failure: Slash stake +``` + +--- + +## Economic Model + +IPC Storage uses a dual payment model: **write payments** for storing data and **read payments** for retrieving data. Together these incentivize node operators to both persist data reliably and serve it on demand. + +### Write Payment + +Users pay upfront to store data on the network. The cost is determined by the size of the data, the duration of storage, and a per-MB price set by the network. + +#### Pricing Formula + +``` +write_cost = price_per_mb × file_size_in_mb × duration +``` + +| Parameter | Description | +|-----------|-------------| +| `price_per_mb` | Network-determined price per megabyte per unit time | +| `file_size_in_mb` | Total size of the stored file in megabytes | +| `duration` | Storage duration (e.g., in epochs or blocks) | + +The user locks this payment into the storage contract when submitting their on-chain storage commitment. + +#### Node Reward Claims + +Node operators earn rewards by proving they continue to store their assigned data. A node can claim accumulated rewards at any time by submitting a claim transaction, provided they have been passing storage challenges. + +``` +node_reward = data_size_stored_in_mb × price_per_mb × duration_since_last_claim +``` + +| Parameter | Description | +|-----------|-------------| +| `data_size_stored_in_mb` | Size of data this node is responsible for (its encoded chunk) | +| `price_per_mb` | Same per-MB rate from the storage commitment | +| `duration_since_last_claim` | Time elapsed since the node's last successful claim | + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ WRITE PAYMENT FLOW │ +│ │ +│ 1. User submits StorageCommitment on-chain │ +│ - Locks payment: price_per_mb × file_size_in_mb × duration │ +│ │ +│ 2. Storage nodes store their assigned chunks and respond to challenges │ +│ │ +│ 3. Node claims reward: │ +│ - Contract verifies node has passed challenges since last claim │ +│ - Pays out: data_size_stored_in_mb × price_per_mb × elapsed_time │ +│ - Resets the node's last_claim timestamp │ +│ │ +│ 4. Repeat until storage duration expires │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +Nodes that fail challenges forfeit rewards for the period in which they failed. Repeated failures lead to slashing (see [Security Considerations](#security-considerations)). + +### Read Payment + +Reading data uses an off-chain **payment ticket** model. Anyone who wants to retrieve a file issues a signed payment ticket to the node operator serving the requested chunk. The node operator can later redeem these tickets on-chain. + +#### Pricing Formula + +``` +read_cost = read_price_per_mb × file_size_in_mb +``` + +| Parameter | Description | +|-----------|-------------| +| `read_price_per_mb` | Network or market-determined price per megabyte for reads | +| `file_size_in_mb` | Size of the data being read in megabytes | + +#### Payment Ticket Flow + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ READ PAYMENT FLOW │ +│ │ +│ 1. Reader requests chunk from a storage node │ +│ │ +│ 2. Reader issues a signed payment ticket: │ +│ - Ticket contains: reader address, node address, chunk id, │ +│ amount (read_price_per_mb × chunk_size_in_mb), │ +│ nonce, signature │ +│ │ +│ 3. Node validates the ticket signature and serves the chunk │ +│ │ +│ 4. Node accumulates tickets and redeems them on-chain in batches │ +│ - Contract verifies signatures and transfers payment │ +│ - Tickets are marked as spent to prevent double-redemption │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +#### Payment Ticket Structure + +```solidity +struct ReadPaymentTicket { + address reader; // Who is paying for the read + address nodeOperator; // Who is being paid + bytes32 chunkId; // Identifier of the chunk being read + uint256 amount; // Payment amount (read_price_per_mb × size) + uint256 nonce; // Unique nonce to prevent replay + bytes signature; // Reader's signature over the ticket +} +``` + +Batch redemption lets node operators amortize on-chain transaction costs by submitting multiple tickets in a single transaction. + +### Summary + +| Payment Type | Payer | Recipient | Pricing | Settlement | +|-------------|-------|-----------|---------|------------| +| **Write** | Data owner | Storage nodes | `price_per_mb × size × duration` | On-chain claims (challenge-gated) | +| **Read** | Data reader | Serving node | `read_price_per_mb × size` | Off-chain tickets, redeemed on-chain | + +--- + +## Repair Process + +When a storage node fails, the system must reconstruct and redistribute the lost chunk to maintain data availability. IPC Storage uses a **guardian-based repair model** where trusted parties monitor health and perform repairs on behalf of data owners. + +### Repair Model Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ REPAIR RESPONSIBILITY │ +│ │ +│ Data Owner delegates monitoring to a trusted Guardian │ +│ │ +│ ┌──────────┐ delegates ┌──────────────┐ │ +│ │ Client │ ─────────────────────────▶ │ Guardian │ │ +│ │ (Owner) │ │ (Trusted) │ │ +│ └──────────┘ └──────────────┘ │ +│ │ │ +│ │ monitors challenges │ +│ │ initiates repair │ +│ │ selects new node │ +│ ▼ │ +│ ┌──────────────┐ │ +│ │ Storage │ │ +│ │ Nodes │ │ +│ └──────────────┘ │ +│ │ +│ No Guardian Delegated = Data at risk if client is offline │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Data Structures + +```solidity +// Trusted guardian registration (protocol-level) +struct Guardian { + address guardianAddress; + bytes32 publicKey; + uint256 stakedCollateral; // Skin in the game + bool isActive; +} + +// Per-commitment delegation (client delegates to guardian) +struct RepairDelegation { + bytes32 commitmentId; + address guardian; // Must be in trusted guardian set + uint64 expiresAt; +} + +// Repair event record +struct RepairRecord { + bytes32 commitmentId; + uint32 failedChunkIndex; + address failedNode; + address newNode; + address guardian; + uint256 slashedAmount; + uint256 guardianReward; // Portion of slash to guardian + uint256 clientReimbursement; // Portion of slash to client + uint64 timestamp; +} +``` + +### Repair Trigger Conditions + +Repair is triggered when ALL conditions are met: + +1. Node fails **N consecutive challenges** (e.g., N = 3) +2. A valid **RepairDelegation** exists for the commitment +3. Guardian is **active** in the trusted guardian set + +``` +Challenge History: + Challenge 1: PASSED ─┐ + Challenge 2: PASSED │ Counter resets on pass + Challenge 3: FAILED ─┘ consecutive_failures = 1 + Challenge 4: FAILED consecutive_failures = 2 + Challenge 5: FAILED consecutive_failures = 3 → TRIGGER REPAIR +``` + +### Repair Execution Flow + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ GUARDIAN REPAIR EXECUTION │ +│ │ +│ 1. DETECT │ +│ Guardian monitors ChallengeContract events on-chain │ +│ Detects: Node X failed 3rd consecutive challenge for Commitment C │ +│ │ +│ 2. INITIATE ON-CHAIN │ +│ Guardian calls: RepairContract.initiateRepair(commitmentId, chunkIndex) │ +│ │ +│ Contract verifies: │ +│ - Guardian is delegated for this commitment │ +│ - Node has failed required consecutive challenges │ +│ │ +│ Contract actions: │ +│ - Marks chunk as REPAIRING (prevents duplicate repairs) │ +│ - Slashes failed node's stake │ +│ - Emits RepairInitiated event │ +│ │ +│ 3. RECONSTRUCT OFF-CHAIN │ +│ Guardian performs reconstruction for each lost shard: │ +│ a. Derive which shards the failed node held (deterministic mapping) │ +│ b. For each affected chunk, fetch k shards from other nodes │ +│ c. RS-decode to reconstruct the chunk │ +│ d. RS-encode to regenerate the missing shard │ +│ e. Verify reconstructed shard matches on-chain CMR │ +│ │ +│ 4. SELECT NEW NODE │ +│ Guardian selects replacement node (full discretion): │ +│ - From available node pool with sufficient stake │ +│ - Excluding nodes already storing shards for this commitment │ +│ │ +│ 5. DISTRIBUTE TO NEW NODE │ +│ Guardian sends to new node: │ +│ - Reconstructed shard data (encrypted) │ +│ - Merkle proof (shard → CMR → FMR) │ +│ New node validates and stores under blob_id/chunk_index/shard_index │ +│ │ +│ 6. COMPLETE ON-CHAIN │ +│ Guardian calls: RepairContract.completeRepair( │ +│ commitmentId, chunkIndex, shardIndex, newNodeId, nodeSignature │ +│ ) │ +│ │ +│ Contract verifies: │ +│ - Repair was initiated by this guardian │ +│ - New node signed acknowledgment of storage │ +│ - Within repair deadline │ +│ │ +│ Contract actions: │ +│ - Records ShardOverride: (blobId, chunkIndex, shardIndex) → newNode │ +│ - Distributes slashed stake to guardian + client │ +│ - Marks shard as HEALTHY │ +│ - Emits RepairCompleted event │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Economic Flow + +The repair economic model is simple: **slashed stake compensates guardian and client**. + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ REPAIR ECONOMICS │ +│ │ +│ INPUT: │ +│ Slashed stake from failed node: S │ +│ │ +│ DISTRIBUTION: │ +│ Guardian reward: S × guardian_pct (e.g., 50%) │ +│ Client reimbursement: S × (1 - guardian_pct) │ +│ │ +│ NEW NODE PAYMENT: │ +│ New node takes over the storage deal from failed node │ +│ Receives ongoing storage payments from original commitment │ +│ │ +│ EXAMPLE (S = 100 tokens, guardian_pct = 50%): │ +│ Guardian receives: 50 tokens (covers their bandwidth + profit) │ +│ Client receives: 50 tokens (partial compensation for risk) │ +│ New node: Inherits storage deal, earns future payments │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Batch Repairs + +When multiple chunks fail (e.g., correlated node failures), repairs can be batched: + +```solidity +function initiateBatchRepair( + bytes32 commitmentId, + uint32[] calldata chunkIndices +) external onlyDelegatedGuardian(commitmentId) { + for (uint i = 0; i < chunkIndices.length; i++) { + _initiateRepair(commitmentId, chunkIndices[i]); + } +} + +function completeBatchRepair( + bytes32 commitmentId, + uint32[] calldata chunkIndices, + bytes32[] calldata newNodeIds, + bytes[] calldata nodeSignatures +) external { + require(chunkIndices.length == newNodeIds.length, "Length mismatch"); + for (uint i = 0; i < chunkIndices.length; i++) { + _completeRepair(commitmentId, chunkIndices[i], newNodeIds[i], nodeSignatures[i]); + } +} +``` + +### Repair Deadline + +Guardian must complete repair within a deadline to prevent indefinite REPAIRING states: + +``` +repair_deadline = initiation_block + MAX_REPAIR_BLOCKS (e.g., 1 hour) + +If deadline exceeded: + - Any guardian can call: RepairContract.expireRepair(commitmentId, chunkIndex) + - Chunk status: REPAIRING → FAILED + - Original guardian forfeits the repair opportunity + - Another delegated guardian (or client) can initiate fresh repair +``` + +### Chunk States + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CHUNK STATE MACHINE │ +│ │ +│ ┌───────────────────────────────────┐ │ +│ │ │ │ +│ ▼ │ │ +│ ┌─────────┐ challenge ┌─────────┐ N failures ┌┴────────┐ │ +│ │ HEALTHY │ ────────────▶ │ AT_RISK │ ─────────────▶ │ SLASHED │ │ +│ └─────────┘ failed └─────────┘ └─────────┘ │ +│ ▲ │ │ │ +│ │ │ challenge │ │ +│ │ │ passed ▼ │ +│ │ │ ┌───────────┐ │ +│ │ └──────────────────▶ │ REPAIRING │ │ +│ │ └───────────┘ │ +│ │ │ │ +│ │ repair completed │ │ +│ └────────────────────────────────────────────────────┘ │ +│ │ +│ repair deadline exceeded │ +│ ┌───────────┐ │ +│ │ FAILED │ (requires fresh repair initiation) │ +│ └───────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Guardian Trust Model + +Guardians are a **trusted, permissioned set** registered at the protocol level: + +| Requirement | Description | +|-------------|-------------| +| **Staked Collateral** | Guardians must stake tokens to participate | +| **Registration** | Protocol governance approves guardian additions | +| **Slashing** | Guardians can be slashed for malicious behavior | +| **Reputation** | Track record of successful repairs visible on-chain | + +This trusted model avoids DDoS and sybil attack vectors on the repair mechanism. + +### No Guardian Fallback + +If a client has **no guardian delegated**: + +- Client must monitor challenges themselves +- Client must perform repairs themselves (same flow, but client is the actor) +- If client is offline and node fails → **data becomes at-risk** +- Data can still be recovered as long as k chunks remain available +- Once fewer than k chunks available → **data is lost** + +This is the client's responsibility. The protocol does not provide automatic fallback. + +--- + +## References + +1. **Storj Whitepaper**: [https://storj.io/storj.pdf](https://storj.io/storj.pdf) + - Reed-Solomon erasure coding parameters + - Distributed storage architecture + +2. **Filecoin Spec - Proof of Data Possession**: [https://spec.filecoin.io/](https://spec.filecoin.io/) + - PDP challenge-response protocol + - Merkle tree construction for storage proofs + +3. **Reed-Solomon Error Correction**: [https://en.wikipedia.org/wiki/Reed–Solomon_error_correction](https://en.wikipedia.org/wiki/Reed–Solomon_error_correction) + - Mathematical foundations of erasure coding + +4. **Merkle Trees**: [https://en.wikipedia.org/wiki/Merkle_tree](https://en.wikipedia.org/wiki/Merkle_tree) + - Hash tree structure and verification + +5. **ECVRF (Verifiable Random Functions)**: [https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-vrf](https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-vrf) + - VRF specification for unpredictable randomness + +--- + +## Appendix: Example Calculation + +### Parameters for 1 GB File + +``` +Original data size: 1 GB = 1,073,741,824 bytes +After encryption: ~1 GB (AES-GCM adds minimal overhead) + +Max chunk size: 16 MiB = 16,777,216 bytes +Num chunks: ceil(1 GB / 16 MiB) = 64 + +Data shards (k): 15 +Parity shards (m): 8 +Total shards per chunk: 23 + +Total shards: 64 chunks × 23 shards = 1,472 shards +Storage nodes: 30 +Shards per node: ~49 (1,472 / 30) + +On-chain commitment: + - blob_id: 32 bytes + - original_len: 8 bytes + - data_shards: 2 bytes + - parity_shards: 2 bytes + - encoding_epoch: 8 bytes + Total: 52 bytes (+ Merkle root, expiry, owner) + +Storage expansion: 1.53× (23/15) +Recovery threshold: Any 15 of 23 shards per chunk +Shard→node mapping: Deterministic (no storage cost) + +Challenge response deadline: 30 seconds + (Reconstruction time at 100 Mbps: ~90 seconds) +``` diff --git a/ipc-storage/erasure-encoding/Cargo.toml b/ipc-storage/erasure-encoding/Cargo.toml new file mode 100644 index 000000000..dc0dffa70 --- /dev/null +++ b/ipc-storage/erasure-encoding/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "erasure-encoding" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +reed-solomon-simd = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] +blake3 = { workspace = true } +memmap2 = { workspace = true } +rand = { workspace = true } +tempfile = { workspace = true } diff --git a/ipc-storage/erasure-encoding/src/assign.rs b/ipc-storage/erasure-encoding/src/assign.rs new file mode 100644 index 000000000..c2b9fc095 --- /dev/null +++ b/ipc-storage/erasure-encoding/src/assign.rs @@ -0,0 +1,266 @@ +use crate::traits::NodeAssigner; +use crate::types::{AssignedShard, NodeId, Shard}; + +/// A 32-byte blob identifier used to derive deterministic rotation offset. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct BlobId(pub [u8; 32]); + +impl BlobId { + /// Compute `self % divisor` over all 32 bytes (big-endian big-number modulo). + fn modulo(&self, divisor: usize) -> usize { + let mut remainder: u64 = 0; + for &byte in &self.0 { + remainder = (remainder * 256 + byte as u64) % divisor as u64; + } + remainder as usize + } +} + +/// Deterministic node assigner using blob_id as starting rotation offset. +/// +/// The blob_id determines where in the node list assignment begins. +/// Each subsequent shard advances by one position. This implements the +/// DESIGN.md formula: +/// ```text +/// rotation_offset = blob_id % num_nodes +/// shard_global = chunk_index * (k + m) + shard_index +/// node = nodes[(shard_global + rotation_offset) % num_nodes] +/// ``` +pub struct DeterministicAssigner { + rotation_offset: usize, + position: usize, +} + +impl DeterministicAssigner { + pub fn new(blob_id: BlobId, num_nodes: usize) -> Self { + Self { + rotation_offset: blob_id.modulo(num_nodes), + position: 0, + } + } +} + +impl NodeAssigner for DeterministicAssigner { + type Shard = Shard; + + fn assign(&mut self, shard: Shard, nodes: &[NodeId]) -> AssignedShard { + let node_index = (self.position + self.rotation_offset) % nodes.len(); + self.position += 1; + AssignedShard { + shard, + node: nodes[node_index], + } + } +} + +/// Compute which node holds a specific shard without needing encoded data. +/// +/// This is the canonical mapping function that both distributor and retriever +/// use to determine shard placement. Any party can recompute this from on-chain +/// parameters alone. +pub fn shard_node( + blob_id: &BlobId, + chunk_index: usize, + shard_index: usize, + shards_per_chunk: usize, + nodes: &[NodeId], +) -> NodeId { + let num_nodes = nodes.len(); + let rotation_offset = blob_id.modulo(num_nodes); + let shard_global = chunk_index * shards_per_chunk + shard_index; + let node_index = (shard_global + rotation_offset) % num_nodes; + nodes[node_index] +} + +/// Compute the full shard-to-node mapping for an entire blob. +/// +/// Returns `(chunk_index, shard_index, NodeId)` for every shard. +pub fn full_shard_mapping( + blob_id: &BlobId, + num_chunks: usize, + data_shards: usize, + parity_shards: usize, + nodes: &[NodeId], +) -> Vec<(usize, usize, NodeId)> { + let shards_per_chunk = data_shards + parity_shards; + let mut mapping = Vec::with_capacity(num_chunks * shards_per_chunk); + for chunk_idx in 0..num_chunks { + for shard_idx in 0..shards_per_chunk { + let node = shard_node(blob_id, chunk_idx, shard_idx, shards_per_chunk, nodes); + mapping.push((chunk_idx, shard_idx, node)); + } + } + mapping +} + +/// Given a node, return all `(chunk_index, shard_index)` pairs assigned to it. +/// +/// Useful for a storage node to know which shards it should expect/hold. +pub fn shards_for_node( + blob_id: &BlobId, + num_chunks: usize, + data_shards: usize, + parity_shards: usize, + nodes: &[NodeId], + target_node: &NodeId, +) -> Vec<(usize, usize)> { + let shards_per_chunk = data_shards + parity_shards; + let mut result = Vec::new(); + for chunk_idx in 0..num_chunks { + for shard_idx in 0..shards_per_chunk { + let node = shard_node(blob_id, chunk_idx, shard_idx, shards_per_chunk, nodes); + if node == *target_node { + result.push((chunk_idx, shard_idx)); + } + } + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::encode::{encode_and_assign, DEFAULT_MAX_CHUNK_SIZE}; + use crate::error::Result; + use crate::reed_solomon::ReedSolomonEncoder; + use std::collections::HashSet; + + fn make_nodes(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut id = [0u8; 32]; + id[0] = i as u8; + NodeId(id) + }) + .collect() + } + + fn make_blob_id(seed: u8) -> BlobId { + let mut id = [0u8; 32]; + id[0] = seed; + BlobId(id) + } + + #[test] + fn deterministic_same_inputs_same_outputs() { + let blob_id = make_blob_id(42); + let nodes = make_nodes(10); + + let mapping1 = full_shard_mapping(&blob_id, 3, 4, 2, &nodes); + let mapping2 = full_shard_mapping(&blob_id, 3, 4, 2, &nodes); + + assert_eq!(mapping1, mapping2); + } + + #[test] + fn assigner_matches_shard_node() { + let blob_id = make_blob_id(7); + let nodes = make_nodes(5); + let k = 3; + let m = 2; + let data = vec![99u8; 1000]; + + let (_meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + DeterministicAssigner::new(blob_id, nodes.len()), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + + for chunk in &chunks { + for assigned in &chunk.shards { + let expected = + shard_node(&blob_id, chunk.chunk_index, assigned.shard.index, k + m, &nodes); + assert_eq!( + assigned.node, expected, + "Mismatch at chunk={} shard={}", + chunk.chunk_index, assigned.shard.index + ); + } + } + } + + #[test] + fn different_blob_ids_different_offsets() { + let nodes = make_nodes(10); + + let mapping_a = full_shard_mapping(&make_blob_id(1), 1, 4, 2, &nodes); + let mapping_b = full_shard_mapping(&make_blob_id(2), 1, 4, 2, &nodes); + + let nodes_a: Vec<_> = mapping_a.iter().map(|(_, _, n)| *n).collect(); + let nodes_b: Vec<_> = mapping_b.iter().map(|(_, _, n)| *n).collect(); + + assert_ne!(nodes_a, nodes_b); + } + + #[test] + fn shards_for_node_covers_all_shards() { + let blob_id = make_blob_id(99); + let nodes = make_nodes(5); + let k = 3; + let m = 2; + let num_chunks = 4; + let total_shards = num_chunks * (k + m); + + let mut all_shards: HashSet<(usize, usize)> = HashSet::new(); + for node in &nodes { + let shards = shards_for_node(&blob_id, num_chunks, k, m, &nodes, node); + for s in shards { + assert!(all_shards.insert(s), "Duplicate shard assignment: {:?}", s); + } + } + + assert_eq!(all_shards.len(), total_shards); + } + + #[test] + fn hand_calculated_example() { + // blob_id = [7, 0, 0, ...], big-endian modulo 5: + // byte 0: (0 * 256 + 7) % 5 = 2, rest are 0 → offset = 2 + let blob_id = make_blob_id(7); + let nodes = make_nodes(5); + let shards_per_chunk = 3; + + // chunk 0, shard 0: (0 + 2) % 5 = 2 + assert_eq!(shard_node(&blob_id, 0, 0, shards_per_chunk, &nodes), nodes[2]); + // chunk 0, shard 1: (1 + 2) % 5 = 3 + assert_eq!(shard_node(&blob_id, 0, 1, shards_per_chunk, &nodes), nodes[3]); + // chunk 0, shard 2: (2 + 2) % 5 = 4 + assert_eq!(shard_node(&blob_id, 0, 2, shards_per_chunk, &nodes), nodes[4]); + // chunk 1, shard 0: (3 + 2) % 5 = 0 + assert_eq!(shard_node(&blob_id, 1, 0, shards_per_chunk, &nodes), nodes[0]); + // chunk 1, shard 1: (4 + 2) % 5 = 1 + assert_eq!(shard_node(&blob_id, 1, 1, shards_per_chunk, &nodes), nodes[1]); + } + + #[test] + fn multi_chunk_assigner_consistency() { + let blob_id = make_blob_id(13); + let nodes = make_nodes(8); + let k = 2; + let m = 1; + let data = vec![1u8; DEFAULT_MAX_CHUNK_SIZE + 100]; + + let (_meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + DeterministicAssigner::new(blob_id, nodes.len()), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + + assert_eq!(chunks.len(), 2); + for chunk in &chunks { + for assigned in &chunk.shards { + let expected = + shard_node(&blob_id, chunk.chunk_index, assigned.shard.index, k + m, &nodes); + assert_eq!(assigned.node, expected); + } + } + } +} diff --git a/ipc-storage/erasure-encoding/src/decode.rs b/ipc-storage/erasure-encoding/src/decode.rs new file mode 100644 index 000000000..ffa69306b --- /dev/null +++ b/ipc-storage/erasure-encoding/src/decode.rs @@ -0,0 +1,189 @@ +use crate::error::{ErasureError, Result}; +use crate::traits::Decoder; +use crate::types::Shard; + +/// Per-chunk recovery input for [`decode_chunks`]. +#[derive(Debug, Clone)] +pub struct ChunkRecoveryInput { + pub chunk_index: usize, + /// Length of the original (unpadded) data in this chunk. + pub original_data_len: usize, + /// Available shards (indices 0..k = data, k..k+m = parity). + pub shards: Vec, + /// Number of data shards (k) used during encoding. + pub num_data_shards: usize, + /// Number of parity shards (m) used during encoding. + pub num_parity_shards: usize, +} + +/// Reconstruct the original data from a set of chunk recovery inputs. +/// +/// Each entry in `chunks` describes one chunk's available shards. Chunks are +/// sorted by `chunk_index`, decoded individually, and concatenated. The result +/// is truncated to `original_total_len` to strip padding. +pub fn decode_chunks>( + chunks: &mut [ChunkRecoveryInput], + original_total_len: usize, +) -> Result> { + if chunks.is_empty() { + return Err(ErasureError::EmptyData); + } + + chunks.sort_by_key(|c| c.chunk_index); + + let mut output = Vec::with_capacity(original_total_len); + + for chunk in chunks.iter() { + let mut decoded = D::decode( + &chunk.shards, + chunk.num_data_shards, + chunk.num_parity_shards, + )?; + decoded.truncate(chunk.original_data_len); + output.extend_from_slice(&decoded); + } + + output.truncate(original_total_len); + Ok(output) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::encode::{encode_and_assign, RotatingAssigner, DEFAULT_MAX_CHUNK_SIZE}; + use crate::error::Result; + use crate::reed_solomon::ReedSolomonEncoder; + use crate::types::NodeId; + + fn make_nodes(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut id = [0u8; 32]; + id[0] = i as u8; + NodeId(id) + }) + .collect() + } + + fn to_recovery_inputs( + encoded: &[crate::types::EncodedChunk], + k: usize, + m: usize, + drop_indices: &[usize], + ) -> Vec { + encoded + .iter() + .map(|ec| { + let shards: Vec<_> = ec + .shards + .iter() + .filter(|s| !drop_indices.contains(&s.shard.index)) + .map(|s| s.shard.clone()) + .collect(); + ChunkRecoveryInput { + chunk_index: ec.chunk_index, + original_data_len: ec.original_data_len, + shards, + num_data_shards: k, + num_parity_shards: m, + } + }) + .collect() + } + + #[test] + fn round_trip_single_chunk() { + let k = 4; + let m = 2; + let nodes = make_nodes(6); + let data: Vec = (0..1000).map(|i| (i % 251) as u8).collect(); + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let encoded: Vec<_> = iter.collect::>>().unwrap(); + + let mut inputs = to_recovery_inputs(&encoded, k, m, &[]); + let recovered = + decode_chunks::(&mut inputs, meta.original_len).unwrap(); + assert_eq!(recovered, data); + } + + #[test] + fn round_trip_with_losses() { + let k = 4; + let m = 2; + let nodes = make_nodes(6); + let data: Vec = (0..500).map(|i| (i % 199) as u8).collect(); + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let encoded: Vec<_> = iter.collect::>>().unwrap(); + + // Drop originals 0 and 1. + let mut inputs = to_recovery_inputs(&encoded, k, m, &[0, 1]); + let recovered = + decode_chunks::(&mut inputs, meta.original_len).unwrap(); + assert_eq!(recovered, data); + } + + #[test] + fn round_trip_multi_chunk() { + let k = 2; + let m = 1; + let nodes = make_nodes(3); + let data: Vec = (0..(DEFAULT_MAX_CHUNK_SIZE + 500)) + .map(|i| (i % 241) as u8) + .collect(); + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let encoded: Vec<_> = iter.collect::>>().unwrap(); + assert_eq!(encoded.len(), 2); + + let mut inputs = to_recovery_inputs(&encoded, k, m, &[]); + let recovered = + decode_chunks::(&mut inputs, meta.original_len).unwrap(); + assert_eq!(recovered, data); + } + + #[test] + fn padding_correctness_odd_size() { + let k = 3; + let m = 2; + let nodes = make_nodes(5); + let data: Vec = (0..77).map(|i| (i * 3 % 256) as u8).collect(); + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let encoded: Vec<_> = iter.collect::>>().unwrap(); + + let mut inputs = to_recovery_inputs(&encoded, k, m, &[]); + let recovered = + decode_chunks::(&mut inputs, meta.original_len).unwrap(); + assert_eq!(recovered, data); + } +} diff --git a/ipc-storage/erasure-encoding/src/encode.rs b/ipc-storage/erasure-encoding/src/encode.rs new file mode 100644 index 000000000..191a12e5c --- /dev/null +++ b/ipc-storage/erasure-encoding/src/encode.rs @@ -0,0 +1,220 @@ +use crate::error::{ErasureError, Result}; +use crate::traits::{Encoder, NodeAssigner}; +use crate::types::{AssignedShard, EncodedChunk, EncodingMetadata, NodeId, Shard}; + +/// Default maximum chunk size (16 MiB) per DESIGN.md. +pub const DEFAULT_MAX_CHUNK_SIZE: usize = 16 * 1024 * 1024; + +/// Node assigner that rotates through the node list sequentially. +/// +/// Each shard advances the position by one, so consecutive chunks naturally +/// map to different node subsets. +pub struct RotatingAssigner { + position: usize, +} + +impl RotatingAssigner { + pub fn new() -> Self { + Self { position: 0 } + } +} + +impl Default for RotatingAssigner { + fn default() -> Self { + Self::new() + } +} + +impl NodeAssigner for RotatingAssigner { + type Shard = Shard; + + fn assign(&mut self, shard: Shard, nodes: &[NodeId]) -> AssignedShard { + let node = nodes[self.position % nodes.len()]; + self.position += 1; + AssignedShard { shard, node } + } +} + +/// Encode `data` using erasure coding and assign shards to `nodes`. +/// +/// Returns `(metadata, iterator)`. The iterator yields one [`EncodedChunk`] per +/// chunk so that only one chunk's shards are in memory at a time. +/// +/// Each chunk is passed to `E::encode` which handles splitting, padding, and +/// encoding internally, yielding all k+m shards. Shards are assigned to nodes +/// via the provided [`NodeAssigner`]. +pub fn encode_and_assign<'a, E: Encoder, N: NodeAssigner + 'a>( + data: &'a [u8], + data_chunks: usize, + parity_chunks: usize, + nodes: &'a [NodeId], + mut node_assigner: N, +) -> Result<( + EncodingMetadata, + impl Iterator> + 'a, +)> { + if nodes.is_empty() { + return Err(ErasureError::NotEnoughNodes { + needed: 1, + available: 0, + }); + } + if data.is_empty() { + return Err(ErasureError::EmptyData); + } + if data_chunks == 0 { + return Err(ErasureError::InvalidDataShards(data_chunks)); + } + if parity_chunks == 0 { + return Err(ErasureError::InvalidParityShards(parity_chunks)); + } + + let num_chunks = data.len().div_ceil(DEFAULT_MAX_CHUNK_SIZE); + + let metadata = EncodingMetadata { + original_len: data.len(), + num_chunks, + data_shards: data_chunks, + parity_shards: parity_chunks, + }; + + // the total number of shards for each chunk after erasure encoding + + let total_shards = data_chunks + parity_chunks; + let iter = (0..num_chunks).map(move |chunk_index| { + let chunk_start = chunk_index * DEFAULT_MAX_CHUNK_SIZE; + let chunk_end = (chunk_start + DEFAULT_MAX_CHUNK_SIZE).min(data.len()); + let chunk_data = &data[chunk_start..chunk_end]; + let original_data_len = chunk_data.len(); + + let all_shards = E::encode(chunk_data, data_chunks, parity_chunks)?; + + let mut assigned: Vec = Vec::with_capacity(total_shards); + for shard in all_shards { + assigned.push(node_assigner.assign(shard, nodes)); + } + + Ok(EncodedChunk { + chunk_index, + original_data_len, + shards: assigned, + }) + }); + + Ok((metadata, iter)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::reed_solomon::ReedSolomonEncoder; + + fn make_nodes(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut id = [0u8; 32]; + id[0] = i as u8; + NodeId(id) + }) + .collect() + } + + #[test] + fn encode_and_assign_basic() { + let k = 4; + let m = 2; + let nodes = make_nodes(6); + let data = vec![42u8; 1000]; + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + + assert_eq!(meta.original_len, 1000); + assert_eq!(meta.num_chunks, 1); + assert_eq!(meta.data_shards, k); + assert_eq!(meta.parity_shards, m); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].shards.len(), k + m); + } + + #[test] + fn encode_and_assign_multi_chunk() { + let k = 2; + let m = 1; + let nodes = make_nodes(3); + let data = vec![7u8; DEFAULT_MAX_CHUNK_SIZE + 100]; + + let (meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + + assert_eq!(meta.num_chunks, 2); + assert_eq!(chunks.len(), 2); + assert_eq!(chunks[0].original_data_len, DEFAULT_MAX_CHUNK_SIZE); + assert_eq!(chunks[1].original_data_len, 100); + } + + #[test] + fn error_empty() { + let nodes = make_nodes(3); + let result = + encode_and_assign::(&[], 2, 1, &nodes, RotatingAssigner::new()); + assert!(matches!(result, Err(ErasureError::EmptyData))); + } + + #[test] + fn fewer_nodes_than_shards_ok() { + let k = 4; + let m = 2; + let nodes = make_nodes(3); + let data = vec![42u8; 1000]; + + let (_meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + assert_eq!(chunks[0].shards.len(), k + m); + } + + #[test] + fn rotating_assigner_distributes_across_chunks() { + let k = 2; + let m = 1; + let nodes = make_nodes(6); + let data = vec![1u8; DEFAULT_MAX_CHUNK_SIZE * 2]; + + let (_meta, iter) = encode_and_assign::( + &data, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .unwrap(); + let chunks: Vec<_> = iter.collect::>>().unwrap(); + + // Chunk 0 gets nodes [0,1,2], chunk 1 gets nodes [3,4,5]. + let c0_nodes: Vec<_> = chunks[0].shards.iter().map(|s| s.node).collect(); + let c1_nodes: Vec<_> = chunks[1].shards.iter().map(|s| s.node).collect(); + assert_eq!(c0_nodes, &nodes[0..3]); + assert_eq!(c1_nodes, &nodes[3..6]); + } +} diff --git a/ipc-storage/erasure-encoding/src/error.rs b/ipc-storage/erasure-encoding/src/error.rs new file mode 100644 index 000000000..66d26c253 --- /dev/null +++ b/ipc-storage/erasure-encoding/src/error.rs @@ -0,0 +1,33 @@ +use thiserror::Error; + +pub type Result = std::result::Result; + +#[derive(Debug, Error)] +pub enum ErasureError { + #[error("input data is empty")] + EmptyData, + + #[error("invalid data shard count: {0} (must be > 0)")] + InvalidDataShards(usize), + + #[error("invalid parity shard count: {0} (must be > 0)")] + InvalidParityShards(usize), + + #[error("not enough nodes: need {needed}, have {available}")] + NotEnoughNodes { needed: usize, available: usize }, + + #[error("shard count mismatch: expected {expected}, got {actual}")] + ShardSizeMismatch { expected: usize, actual: usize }, + + #[error("not enough shards for decoding: need {needed}, have {available}")] + NotEnoughShards { needed: usize, available: usize }, + + #[error("shard index {index} out of range (max {max})")] + ShardIndexOutOfRange { index: usize, max: usize }, + + #[error("duplicate shard index: {0}")] + DuplicateShardIndex(usize), + + #[error("reed-solomon error: {0}")] + ReedSolomon(#[from] reed_solomon_simd::Error), +} diff --git a/ipc-storage/erasure-encoding/src/lib.rs b/ipc-storage/erasure-encoding/src/lib.rs new file mode 100644 index 000000000..d2c7e6344 --- /dev/null +++ b/ipc-storage/erasure-encoding/src/lib.rs @@ -0,0 +1,15 @@ +pub mod assign; +pub mod decode; +pub mod encode; +pub mod error; +pub mod reed_solomon; +pub mod traits; +pub mod types; + +pub use assign::{full_shard_mapping, shard_node, shards_for_node, BlobId, DeterministicAssigner}; +pub use decode::{decode_chunks, ChunkRecoveryInput}; +pub use encode::{encode_and_assign, RotatingAssigner, DEFAULT_MAX_CHUNK_SIZE}; +pub use error::ErasureError; +pub use reed_solomon::ReedSolomonEncoder; +pub use traits::{Decoder, Encoder, NodeAssigner}; +pub use types::{AssignedShard, EncodedChunk, EncodingMetadata, NodeId, Shard}; diff --git a/ipc-storage/erasure-encoding/src/reed_solomon.rs b/ipc-storage/erasure-encoding/src/reed_solomon.rs new file mode 100644 index 000000000..a90dbb6a5 --- /dev/null +++ b/ipc-storage/erasure-encoding/src/reed_solomon.rs @@ -0,0 +1,205 @@ +use std::collections::HashSet; + +use crate::error::{ErasureError, Result}; +use crate::traits::{Decoder, Encoder}; +use crate::types::Shard; + +/// Reed-Solomon encoder/decoder backed by `reed-solomon-simd`. +pub struct ReedSolomonEncoder; + +/// Split raw data into `k` equal shards with zero-padding. +/// Shard size is rounded up to even (reed-solomon-simd requirement). +fn split_into_shards(data: &[u8], k: usize) -> Vec> { + let mut shard_size = data.len().div_ceil(k); + if shard_size % 2 != 0 { + shard_size += 1; + } + if shard_size == 0 { + shard_size = 2; + } + + let mut shards = Vec::with_capacity(k); + for i in 0..k { + let start = i * shard_size; + let mut shard = vec![0u8; shard_size]; + if start < data.len() { + let end = (start + shard_size).min(data.len()); + shard[..end - start].copy_from_slice(&data[start..end]); + } + shards.push(shard); + } + shards +} + +impl Encoder for ReedSolomonEncoder { + type Shard = Shard; + + fn encode( + data: &[u8], + num_data_chunks: usize, + num_parity_chunks: usize, + ) -> Result> { + if num_data_chunks == 0 { + return Err(ErasureError::InvalidDataShards(num_data_chunks)); + } + if num_parity_chunks == 0 { + return Err(ErasureError::InvalidParityShards(num_parity_chunks)); + } + + let original_shards = split_into_shards(data, num_data_chunks); + let parity = + reed_solomon_simd::encode(num_data_chunks, num_parity_chunks, &original_shards)?; + + // Yield k original shards (index 0..k) then m parity shards (index k..k+m). + let iter = original_shards + .into_iter() + .enumerate() + .map(|(i, data)| Shard { index: i, data }) + .chain(parity.into_iter().enumerate().map(move |(i, data)| Shard { + index: num_data_chunks + i, + data, + })); + + Ok(iter) + } +} + +impl Decoder for ReedSolomonEncoder { + type Shard = Shard; + + fn decode( + shards: &[Shard], + num_data_chunks: usize, + num_parity_chunks: usize, + ) -> Result> { + let mut seen = HashSet::with_capacity(shards.len()); + for s in shards { + if !seen.insert(s.index) { + return Err(ErasureError::DuplicateShardIndex(s.index)); + } + } + + let total_available = seen.len(); + if total_available < num_data_chunks { + return Err(ErasureError::NotEnoughShards { + needed: num_data_chunks, + available: total_available, + }); + } + + // Split by index: 0..k = original, k..k+m = recovery. + let original: Vec<_> = shards + .iter() + .filter(|s| s.index < num_data_chunks) + .map(|s| (s.index, s.data.as_slice())) + .collect(); + let recovery: Vec<_> = shards + .iter() + .filter(|s| s.index >= num_data_chunks) + .map(|s| (s.index - num_data_chunks, s.data.as_slice())) + .collect(); + + let restored = reed_solomon_simd::decode( + num_data_chunks, + num_parity_chunks, + original.iter().copied(), + recovery.iter().copied(), + )?; + + // Merge available + restored originals in index order, concatenate. + let mut all_originals: Vec> = vec![None; num_data_chunks]; + for &(idx, data) in &original { + all_originals[idx] = Some(data); + } + for (idx, data) in &restored { + all_originals[*idx] = Some(data.as_slice()); + } + + let mut result = Vec::new(); + for (i, opt) in all_originals.iter().enumerate() { + let shard = opt.ok_or(ErasureError::NotEnoughShards { + needed: num_data_chunks, + available: i, + })?; + result.extend_from_slice(shard); + } + + Ok(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_yields_all_shards() { + let data = vec![42u8; 256]; + let shards: Vec<_> = ReedSolomonEncoder::encode(&data, 4, 2).unwrap().collect(); + assert_eq!(shards.len(), 6); + // Indices 0..4 original, 4..6 parity. + for (i, s) in shards.iter().enumerate() { + assert_eq!(s.index, i); + } + // All shards same size. + let size = shards[0].data.len(); + assert!(shards.iter().all(|s| s.data.len() == size)); + } + + #[test] + fn round_trip_full_shards() { + let data = vec![42u8; 256]; + let shards: Vec<_> = ReedSolomonEncoder::encode(&data, 4, 2).unwrap().collect(); + + let recovered = ReedSolomonEncoder::decode(&shards, 4, 2).unwrap(); + assert_eq!(&recovered[..data.len()], &data); + } + + #[test] + fn decode_with_losses() { + let data = vec![42u8; 256]; + let all_shards: Vec<_> = ReedSolomonEncoder::encode(&data, 4, 2).unwrap().collect(); + + // Drop originals 0 and 1, keep 2, 3 + both parity. + let available: Vec<_> = all_shards.into_iter().filter(|s| s.index >= 2).collect(); + assert_eq!(available.len(), 4); // shards 2, 3, 4, 5 + + let recovered = ReedSolomonEncoder::decode(&available, 4, 2).unwrap(); + assert_eq!(&recovered[..data.len()], &data); + } + + #[test] + fn decode_max_loss() { + let data = vec![42u8; 512]; + let all_shards: Vec<_> = ReedSolomonEncoder::encode(&data, 4, 3).unwrap().collect(); + + // Lose originals 0,1,2, keep index 3 + all 3 parity. + let available: Vec<_> = all_shards.into_iter().filter(|s| s.index >= 3).collect(); + assert_eq!(available.len(), 4); + + let recovered = ReedSolomonEncoder::decode(&available, 4, 3).unwrap(); + assert_eq!(&recovered[..data.len()], &data); + } + + #[test] + fn error_not_enough_shards() { + let shard = Shard { + index: 0, + data: vec![0u8; 64], + }; + let result = ReedSolomonEncoder::decode(&[shard], 4, 2); + assert!(matches!(result, Err(ErasureError::NotEnoughShards { .. }))); + } + + #[test] + fn error_invalid_params() { + assert!(matches!( + ReedSolomonEncoder::encode(&[1, 2], 0, 2).map(|i| i.count()), + Err(ErasureError::InvalidDataShards(0)) + )); + assert!(matches!( + ReedSolomonEncoder::encode(&[1, 2], 2, 0).map(|i| i.count()), + Err(ErasureError::InvalidParityShards(0)) + )); + } +} diff --git a/ipc-storage/erasure-encoding/src/traits.rs b/ipc-storage/erasure-encoding/src/traits.rs new file mode 100644 index 000000000..fc77bf19f --- /dev/null +++ b/ipc-storage/erasure-encoding/src/traits.rs @@ -0,0 +1,33 @@ +use std::fmt::Debug; + +use crate::error::Result; +use crate::types::{AssignedShard, NodeId}; + +/// Assigns a shard to a storage node. +pub trait NodeAssigner { + type Shard: Debug; + + fn assign(&mut self, shard: Self::Shard, nodes: &[NodeId]) -> AssignedShard; +} + +/// Splits raw data into `num_data_chunks` padded shards, encodes, and returns all shards. +pub trait Encoder { + type Shard: Debug; + + fn encode( + data: &[u8], + num_data_chunks: usize, + num_parity_chunks: usize, + ) -> Result>; +} + +/// Recovers missing original shards and returns reconstructed raw data. +pub trait Decoder { + type Shard: Debug; + + fn decode( + shards: &[Self::Shard], + num_data_chunks: usize, + num_parity_chunks: usize, + ) -> Result>; +} diff --git a/ipc-storage/erasure-encoding/src/types.rs b/ipc-storage/erasure-encoding/src/types.rs new file mode 100644 index 000000000..8a38b52f8 --- /dev/null +++ b/ipc-storage/erasure-encoding/src/types.rs @@ -0,0 +1,34 @@ +/// Opaque 32-byte node identifier, decoupled from iroh. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct NodeId(pub [u8; 32]); + +/// A single shard with its positional index. +#[derive(Debug, Clone)] +pub struct Shard { + pub index: usize, + pub data: Vec, +} + +/// A shard assigned to a specific storage node. +#[derive(Debug, Clone)] +pub struct AssignedShard { + pub shard: Shard, + pub node: NodeId, +} + +/// One encoded chunk containing all its assigned shards. +#[derive(Debug, Clone)] +pub struct EncodedChunk { + pub chunk_index: usize, + pub original_data_len: usize, + pub shards: Vec, +} + +/// Metadata describing the encoding parameters, needed for decoding. +#[derive(Debug, Clone)] +pub struct EncodingMetadata { + pub original_len: usize, + pub num_chunks: usize, + pub data_shards: usize, + pub parity_shards: usize, +} diff --git a/ipc-storage/erasure-encoding/tests/integration.rs b/ipc-storage/erasure-encoding/tests/integration.rs new file mode 100644 index 000000000..8eb642de0 --- /dev/null +++ b/ipc-storage/erasure-encoding/tests/integration.rs @@ -0,0 +1,117 @@ +use std::io::Write; + +use erasure_encoding::{ + decode_chunks, encode_and_assign, ChunkRecoveryInput, ErasureError, NodeId, + ReedSolomonEncoder, RotatingAssigner, +}; +use rand::Rng; + +fn make_nodes(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut id = [0u8; 32]; + id[0] = i as u8; + NodeId(id) + }) + .collect() +} + +/// Full end-to-end with mmap: write 100 MB random file, encode via mmap (zero-copy), +/// simulate shard losses, decode, verify via blake3 hash. +#[test] +fn end_to_end_mmap_large_file() { + let k = 15; // data shards + let m = 8; // parity shards + let nodes = make_nodes(30); + let file_size = 100 * 1024 * 1024; // 100 MB + + // 1. Write random data to a temp file. + let mut tmpfile = tempfile::NamedTempFile::new().expect("failed to create temp file"); + let mut rng = rand::thread_rng(); + let mut buf = vec![0u8; 1024 * 1024]; // write 1 MB at a time + let mut hasher = blake3::Hasher::new(); + let mut written = 0; + while written < file_size { + let chunk = (file_size - written).min(buf.len()); + rng.fill(&mut buf[..chunk]); + tmpfile.write_all(&buf[..chunk]).unwrap(); + hasher.update(&buf[..chunk]); + written += chunk; + } + tmpfile.flush().unwrap(); + let original_hash = hasher.finalize(); + + // 2. Memory-map the file (zero-copy, OS pages in/out on demand). + let mmap = unsafe { memmap2::Mmap::map(tmpfile.as_file()).expect("mmap failed") }; + assert_eq!(mmap.len(), file_size); + + // 3. Encode and assign shards to nodes. + let (metadata, chunk_iter) = encode_and_assign::( + &mmap, + k, + m, + &nodes, + RotatingAssigner::new(), + ) + .expect("encoding should succeed"); + + assert_eq!(metadata.original_len, file_size); + assert_eq!(metadata.data_shards, k); + assert_eq!(metadata.parity_shards, m); + + let encoded_chunks: Vec<_> = chunk_iter + .collect::, _>>() + .expect("all chunks should encode"); + + // Each chunk should have k + m shards. + for chunk in &encoded_chunks { + assert_eq!(chunk.shards.len(), k + m); + } + + // 4. Simulate losing shards: keep 8 original data shards + 7 parity shards = 15 total. + // Drop original data shards 0..7 (7 shards) and parity shard at index k (1 shard). + // That leaves 8 originals (indices 8..14) + 7 parities (indices 16..22) = 15 >= k. + let drop_indices: Vec = (0..7).chain(std::iter::once(k)).collect(); + let mut recovery_inputs: Vec = encoded_chunks + .iter() + .map(|ec| { + let surviving_shards = ec + .shards + .iter() + .filter(|a| !drop_indices.contains(&a.shard.index)) + .map(|a| a.shard.clone()) + .collect(); + ChunkRecoveryInput { + chunk_index: ec.chunk_index, + original_data_len: ec.original_data_len, + shards: surviving_shards, + num_data_shards: k, + num_parity_shards: m, + } + }) + .collect(); + + // Verify we kept the right number: 23 total - 8 dropped = 15 surviving. + for input in &recovery_inputs { + assert_eq!(input.shards.len(), k + m - drop_indices.len()); + } + + // 5. Decode and recover. + let recovered = + decode_chunks::(&mut recovery_inputs, metadata.original_len) + .expect("decoding should succeed"); + + // 6. Verify via blake3 hash. + assert_eq!(recovered.len(), file_size); + let recovered_hash = blake3::hash(&recovered); + assert_eq!(recovered_hash, original_hash, "hash mismatch after decode"); +} + +/// Empty nodes should be rejected early. +#[test] +fn empty_nodes_rejected() { + let data = vec![1u8; 100]; + let result = + encode_and_assign::(&data, 2, 1, &[], RotatingAssigner::new()); + assert!(matches!(result, Err(ErasureError::NotEnoughNodes { .. }))); +} diff --git a/ipc-storage/ipc-decentralized-storage/Cargo.toml b/ipc-storage/ipc-decentralized-storage/Cargo.toml index ce96888bc..07e37a501 100644 --- a/ipc-storage/ipc-decentralized-storage/Cargo.toml +++ b/ipc-storage/ipc-decentralized-storage/Cargo.toml @@ -26,10 +26,7 @@ prometheus_exporter.workspace = true uuid.workspace = true mime_guess.workspace = true urlencoding.workspace = true - -# Entanglement dependencies -entangler.workspace = true -entangler_storage.workspace = true +blake3.workspace = true # HTTP client dependencies reqwest = { version = "0.11", features = ["json"] } @@ -38,6 +35,9 @@ reqwest = { version = "0.11", features = ["json"] } clap = { workspace = true, features = ["derive"] } tracing-subscriber = { workspace = true, features = ["env-filter"] } +# Erasure encoding +erasure-encoding = { path = "../erasure-encoding" } + # Iroh dependencies for decentralized storage iroh.workspace = true iroh-base.workspace = true diff --git a/ipc-storage/ipc-decentralized-storage/src/distribution.rs b/ipc-storage/ipc-decentralized-storage/src/distribution.rs new file mode 100644 index 000000000..cad6eb6f1 --- /dev/null +++ b/ipc-storage/ipc-decentralized-storage/src/distribution.rs @@ -0,0 +1,260 @@ +// Copyright 2025 Recall Contributors +// SPDX-License-Identifier: Apache-2.0, MIT + +//! Data distribution module for erasure-encoded shard distribution. +//! +//! Orchestrates encoding data into shards via Reed-Solomon erasure coding, +//! deterministically assigning shards to storage nodes, and notifying nodes +//! to pull their assigned shards via Iroh P2P. + +use std::collections::HashMap; + +use anyhow::{Context, Result}; +use erasure_encoding::{ + encode_and_assign, BlobId, DeterministicAssigner, EncodedChunk, EncodingMetadata, NodeId, + ReedSolomonEncoder, +}; +use iroh::NodeAddr; +use iroh_blobs::Hash; +use iroh_manager::BlobsClient; +use serde::{Deserialize, Serialize}; +use tracing::{debug, info, warn}; + +/// Maps an erasure-encoding NodeId to an Iroh NodeAddr for P2P connectivity. +/// Populated from on-chain operator info at the encoding epoch. +pub type NodeDirectory = HashMap; + +/// Maps an erasure-encoding NodeId to the node's RPC URL. +pub type NodeRpcDirectory = HashMap; + +/// Parameters for distributing a blob's shards. +pub struct DistributeParams { + pub blob_id: BlobId, + /// The encrypted data to encode and distribute. + pub data: Vec, + /// Number of data shards per chunk (k). + pub data_shards: usize, + /// Number of parity shards per chunk (m). + pub parity_shards: usize, + /// Ordered node list from on-chain state at encoding epoch. + pub nodes: Vec, + /// Mapping from NodeId to Iroh NodeAddr for P2P connectivity. + pub node_directory: NodeDirectory, + /// Mapping from NodeId to RPC URL for pull notifications. + pub node_rpc_directory: NodeRpcDirectory, +} + +/// Result of distributing a single shard. +#[derive(Debug)] +pub struct ShardDistributionResult { + pub chunk_index: usize, + pub shard_index: usize, + pub node: NodeId, + pub iroh_hash: Option, + pub success: bool, + pub error: Option, +} + +/// Result of distributing an entire blob. +#[derive(Debug)] +pub struct DistributionResult { + pub metadata: EncodingMetadata, + pub shard_results: Vec, +} + +impl DistributionResult { + pub fn all_succeeded(&self) -> bool { + self.shard_results.iter().all(|r| r.success) + } + + pub fn failure_count(&self) -> usize { + self.shard_results.iter().filter(|r| !r.success).count() + } +} + +/// Deterministic shard storage key following DESIGN.md: +/// key = blob_id / chunk_index / shard_index +pub fn shard_key(blob_id: &BlobId, chunk_index: usize, shard_index: usize) -> String { + let blob_hex = hex::encode(blob_id.0); + format!("{}/{}/{}", blob_hex, chunk_index, shard_index) +} + +/// Request body sent to a storage node's pull endpoint. +#[derive(Debug, Serialize, Deserialize)] +pub struct ShardPullRequest { + pub blob_id: String, + pub chunk_index: usize, + pub shard_index: usize, + pub shards_per_chunk: usize, + pub hash: String, + pub source: NodeAddr, +} + +/// Encode data and distribute shards to their assigned nodes. +/// +/// 1. Erasure-encode the data into chunks and shards +/// 2. Assign shards to nodes deterministically via DeterministicAssigner +/// 3. Store each shard locally in Iroh under a deterministic tag +/// 4. Notify each target node via RPC to pull the shard from us +pub async fn distribute( + params: DistributeParams, + local_blobs: &BlobsClient, + local_node_addr: &NodeAddr, +) -> Result { + let assigner = DeterministicAssigner::new(params.blob_id, params.nodes.len()); + + let (metadata, chunk_iter) = encode_and_assign::( + ¶ms.data, + params.data_shards, + params.parity_shards, + ¶ms.nodes, + assigner, + )?; + + info!( + "Encoded blob: {} chunks, k={}, m={}, original_len={}, input_data_len={}", + metadata.num_chunks, metadata.data_shards, metadata.parity_shards, metadata.original_len, + params.data.len() + ); + + let chunks: Vec = chunk_iter + .collect::, _>>() + .context("erasure encoding failed")?; + + let shards_per_chunk = params.data_shards + params.parity_shards; + let mut shard_results = Vec::new(); + + for chunk in &chunks { + info!( + "Chunk {}: original_data_len={}, num_shards={}", + chunk.chunk_index, chunk.original_data_len, chunk.shards.len() + ); + for assigned_shard in &chunk.shards { + let tag = shard_key(¶ms.blob_id, chunk.chunk_index, assigned_shard.shard.index); + + info!( + "Shard {}/{}: data_len={}, assigned_to={:?}", + chunk.chunk_index, assigned_shard.shard.index, + assigned_shard.shard.data.len(), + hex::encode(assigned_shard.node.0) + ); + + // Step 1: Store shard locally + // add_bytes_named(data, tag_name) + let store_result = local_blobs + .add_bytes_named(assigned_shard.shard.data.clone(), tag.clone()) + .await + .context("failed to store shard locally"); + + let hash = match store_result { + Ok(outcome) => outcome.hash, + Err(e) => { + warn!( + "Failed to store shard {}/{} locally: {}", + chunk.chunk_index, assigned_shard.shard.index, e + ); + shard_results.push(ShardDistributionResult { + chunk_index: chunk.chunk_index, + shard_index: assigned_shard.shard.index, + node: assigned_shard.node, + iroh_hash: None, + success: false, + error: Some(e.to_string()), + }); + continue; + } + }; + + // Step 2: Notify the target node to pull the shard from us + let rpc_url = params.node_rpc_directory.get(&assigned_shard.node); + let notify_result = match rpc_url { + Some(url) => { + notify_node_to_pull( + url, + ¶ms.blob_id, + chunk.chunk_index, + assigned_shard.shard.index, + shards_per_chunk, + hash, + local_node_addr, + ) + .await + } + None => Err(anyhow::anyhow!( + "No RPC URL for node {:?}", + assigned_shard.node + )), + }; + + match ¬ify_result { + Ok(()) => { + debug!( + "Notified node {:?} to pull shard {}/{} (hash={})", + assigned_shard.node, chunk.chunk_index, assigned_shard.shard.index, hash + ); + } + Err(e) => { + warn!( + "Failed to notify node {:?} for shard {}/{}: {}", + assigned_shard.node, chunk.chunk_index, assigned_shard.shard.index, e + ); + } + } + + shard_results.push(ShardDistributionResult { + chunk_index: chunk.chunk_index, + shard_index: assigned_shard.shard.index, + node: assigned_shard.node, + iroh_hash: Some(hash), + success: notify_result.is_ok(), + error: notify_result.err().map(|e| e.to_string()), + }); + } + } + + Ok(DistributionResult { + metadata, + shard_results, + }) +} + +/// Notify a storage node via its RPC endpoint to pull a shard from us. +async fn notify_node_to_pull( + rpc_url: &str, + blob_id: &BlobId, + chunk_index: usize, + shard_index: usize, + shards_per_chunk: usize, + hash: Hash, + source: &NodeAddr, +) -> Result<()> { + let request = ShardPullRequest { + blob_id: hex::encode(blob_id.0), + chunk_index, + shard_index, + shards_per_chunk, + hash: hash.to_string(), + source: source.clone(), + }; + + let url = format!("{}/v1/shards/pull", rpc_url.trim_end_matches('/')); + + let client = reqwest::Client::new(); + let response = client + .post(&url) + .json(&request) + .send() + .await + .context("failed to send pull notification")?; + + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "no body".to_string()); + anyhow::bail!("node returned {}: {}", status, body); + } + + Ok(()) +} diff --git a/ipc-storage/ipc-decentralized-storage/src/gateway/mod.rs b/ipc-storage/ipc-decentralized-storage/src/gateway/mod.rs index 6295a6dc0..54087cb6c 100644 --- a/ipc-storage/ipc-decentralized-storage/src/gateway/mod.rs +++ b/ipc-storage/ipc-decentralized-storage/src/gateway/mod.rs @@ -104,6 +104,49 @@ impl BlobSignatureCollection { } } +/// Default encoding parameters (must match actor defaults). +const DEFAULT_DATA_SHARDS: usize = 4; +const DEFAULT_PARITY_SHARDS: usize = 2; +/// Must match erasure-encoding DEFAULT_MAX_CHUNK_SIZE. +const MAX_CHUNK_SIZE: u64 = 16 * 1024 * 1024; + +/// Compute the set of unique assigned operator indices for a blob using the +/// deterministic assignment formula from erasure-encoding. +fn assigned_operator_indices( + blob_hash: &B256, + blob_size: u64, + data_shards: usize, + parity_shards: usize, + num_operators: usize, +) -> HashSet { + if num_operators == 0 { + return HashSet::new(); + } + let shards_per_chunk = data_shards + parity_shards; + let num_chunks = if blob_size == 0 { + 1 + } else { + ((blob_size + MAX_CHUNK_SIZE - 1) / MAX_CHUNK_SIZE) as usize + }; + // rotation_offset = blob_hash (big-endian) % num_operators + let rotation_offset = { + let mut remainder: u64 = 0; + for &byte in &blob_hash.0 { + remainder = (remainder * 256 + byte as u64) % num_operators as u64; + } + remainder as usize + }; + let mut indices = HashSet::new(); + for chunk_idx in 0..num_chunks { + for shard_idx in 0..shards_per_chunk { + let shard_global = chunk_idx * shards_per_chunk + shard_idx; + let node_index = (shard_global + rotation_offset) % num_operators; + indices.insert(node_index); + } + } + indices +} + /// Default gas parameters for transactions fn default_gas_params() -> GasParams { GasParams { @@ -337,14 +380,28 @@ where continue; } - let threshold = (total_operators * 2 + 2) / 3; // Ceiling of 2/3 + // Compute the set of assigned operator indices for this blob + let assigned = assigned_operator_indices( + &hash, + collection.blob_metadata.size, + DEFAULT_DATA_SHARDS, + DEFAULT_PARITY_SHARDS, + total_operators, + ); + let assigned_count = assigned.len(); + let threshold = (assigned_count * 2 + 2) / 3; // Ceiling of 2/3 // Collect signatures that aren't already attempted let attempted_operators = collection.attempted_operators.clone(); - // Build list of (index, operator_addr, rpc_url) for operators we need to query + // Build list of (index, operator_addr, rpc_url) for assigned operators we need to query let mut fetch_tasks = Vec::new(); for (index, operator_addr) in operators.iter().enumerate() { + // Only query operators assigned to this blob + if !assigned.contains(&index) { + continue; + } + // Skip if already collected if attempted_operators.contains(&index) { continue; @@ -417,8 +474,8 @@ where } info!( - "Collected {}/{} signatures for blob {} (threshold: {})", - num_collected, total_operators, hash, threshold + "Collected {}/{} signatures for blob {} (threshold: {}, total operators: {})", + num_collected, assigned_count, hash, threshold, total_operators ); // Get metadata before calling finalize_blob @@ -469,7 +526,7 @@ where } else { debug!( "Blob {} progress: {}/{} signatures (threshold: {})", - hash, num_collected, total_operators, threshold + hash, num_collected, assigned_count, threshold ); } } diff --git a/ipc-storage/ipc-decentralized-storage/src/lib.rs b/ipc-storage/ipc-decentralized-storage/src/lib.rs index a73f28b63..61444b30b 100644 --- a/ipc-storage/ipc-decentralized-storage/src/lib.rs +++ b/ipc-storage/ipc-decentralized-storage/src/lib.rs @@ -6,6 +6,8 @@ //! This crate provides decentralized storage abstractions and implementations //! for the IPC (Inter-Planetary Consensus) system. +pub mod distribution; pub mod gateway; pub mod node; pub mod objects; +pub mod retrieval; diff --git a/ipc-storage/ipc-decentralized-storage/src/node/mod.rs b/ipc-storage/ipc-decentralized-storage/src/node/mod.rs index f5ec75053..a91b70c2c 100644 --- a/ipc-storage/ipc-decentralized-storage/src/node/mod.rs +++ b/ipc-storage/ipc-decentralized-storage/src/node/mod.rs @@ -10,10 +10,12 @@ mod resolver; mod rpc; +pub mod shard_verifier; pub mod store; use anyhow::{Context, Result}; use bls_signatures::{PrivateKey as BlsPrivateKey, Serialize as BlsSerialize}; +use erasure_encoding::NodeId; use ethers::types::Address; use fendermint_actor_blobs_shared::bytes::B256; use fendermint_rpc::FendermintClient; @@ -23,16 +25,29 @@ use std::collections::HashMap; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::str::FromStr; use std::sync::{Arc, RwLock}; -use std::time::Duration; +use std::time::{Duration, Instant}; use tendermint_rpc::Url; use tokio::sync::Mutex; use tokio::time::sleep; use tracing::{debug, error, info, warn}; +use crate::distribution::NodeRpcDirectory; use crate::gateway::BlobGateway; +use crate::objects::build_node_directories; use resolver::EventPollerConfig; use store::InMemoryStore; +/// Default encoding parameters (must match actor defaults). +const DEFAULT_DATA_SHARDS: usize = 4; +const DEFAULT_PARITY_SHARDS: usize = 2; + +/// Cached operator directory info for the resolution loop. +struct OperatorDirectoryCache { + nodes: Vec, + node_rpc_directory: NodeRpcDirectory, + last_refresh: Instant, +} + /// Configuration for the storage node #[derive(Clone)] pub struct NodeConfig { @@ -154,17 +169,21 @@ pub async fn launch(config: NodeConfig) -> Result<()> { .context("failed to create RPC server Fendermint client")?; let rpc_client = Arc::new(Mutex::new(rpc_client)); - // Start RPC server for signature queries and blob downloads + // Start RPC server for signature queries, blob downloads, and shard pulls let signatures_for_rpc = signatures.clone(); let rpc_bind_addr = config.rpc_bind_addr; let rpc_client_for_server = rpc_client.clone(); let iroh_for_rpc = iroh_node.clone(); + let bls_key_for_rpc = config.bls_private_key; + let rpc_url_for_server = config.rpc_url.clone(); tokio::spawn(async move { if let Err(e) = rpc::start_rpc_server( rpc_bind_addr, signatures_for_rpc, rpc_client_for_server, iroh_for_rpc, + bls_key_for_rpc, + rpc_url_for_server, ) .await { @@ -194,6 +213,14 @@ pub async fn launch(config: NodeConfig) -> Result<()> { } }); + // Determine this node's NodeId from its Iroh identity + let our_node_id = NodeId(node_addr.node_id.as_bytes().clone()); + info!("Our NodeId: {:?}", hex::encode(our_node_id.0)); + + // Operator directory cache (refreshed periodically) + let mut op_cache: Option = None; + let cache_refresh_interval = Duration::from_secs(300); + info!("Starting blob resolution loop"); info!( "BLS public key: {:?}", @@ -203,32 +230,25 @@ pub async fn launch(config: NodeConfig) -> Result<()> { loop { // Check completed downloads and move them to the downloaded set - // Collect finished tasks to process let mut finished = Vec::new(); in_progress.retain(|hash, handle| { if handle.is_finished() { finished.push(*hash); - false // Remove from in_progress + false } else { - true // Keep in in_progress + true } }); - // Process finished downloads for hash in finished { - // Note: The task has finished, but we mark it as downloaded - // The actual result checking would require more complex handling - // For now, we assume successful completion if the task finished - info!("Blob {} download completed, waiting for finalization", hash); - downloaded.insert(hash, std::time::Instant::now()); + info!("Blob {} resolution completed, waiting for finalization", hash); + downloaded.insert(hash, Instant::now()); } - // TODO: Query on-chain blob status to check if downloaded blobs are finalized - // For now, just log the downloaded blobs waiting for finalization + // Clean up old downloaded entries if !downloaded.is_empty() { debug!("Blobs waiting for finalization: {}", downloaded.len()); - // Clean up old entries (older than 5 minutes) to prevent memory leaks - let cutoff = std::time::Instant::now() - Duration::from_secs(300); + let cutoff = Instant::now() - Duration::from_secs(300); downloaded.retain(|hash, timestamp| { if *timestamp < cutoff { warn!("Blob {} has been waiting for finalization for >5 minutes, removing from tracking", hash); @@ -239,6 +259,27 @@ pub async fn launch(config: NodeConfig) -> Result<()> { }); } + // Refresh operator directory cache if stale or missing + let cache_stale = op_cache + .as_ref() + .map_or(true, |c| c.last_refresh.elapsed() > cache_refresh_interval); + + if cache_stale { + match build_node_directories(&gateway).await { + Ok((nodes, _node_directory, node_rpc_directory)) => { + info!("Refreshed operator directory: {} nodes", nodes.len()); + op_cache = Some(OperatorDirectoryCache { + nodes, + node_rpc_directory, + last_refresh: Instant::now(), + }); + } + Err(e) => { + warn!("Failed to refresh operator directory: {}", e); + } + } + } + // Query for added blobs match gateway.query_added_blobs().await { Ok(blobs) => { @@ -246,15 +287,13 @@ pub async fn launch(config: NodeConfig) -> Result<()> { info!("Found {} added blobs to resolve", blobs.len()); for blob_item in blobs { - let (hash, size, sources) = blob_item; + let (hash, size, _sources) = blob_item; - // Skip if already downloading + // Skip if already in progress or downloaded if in_progress.contains_key(&hash) { debug!("Blob {} already in progress, skipping", hash); continue; } - - // Check if we're at the concurrency limit if in_progress.len() >= config.max_concurrent_downloads { warn!( "Max concurrent downloads ({}) reached, deferring blob {}", @@ -262,37 +301,45 @@ pub async fn launch(config: NodeConfig) -> Result<()> { ); continue; } - - // Skip if already downloaded and waiting for finalization if downloaded.contains_key(&hash) { debug!("Blob {} already downloaded, waiting for finalization", hash); continue; } - // Spawn a task to download this blob + // Need operator directory to resolve + let Some(cache) = &op_cache else { + warn!("No operator directory available, deferring blob {}", hash); + continue; + }; + + if cache.nodes.is_empty() { + warn!("No nodes in operator directory, deferring blob {}", hash); + continue; + } + + // Spawn shard-based resolution let iroh_clone = iroh_node.clone(); + let nodes_clone = cache.nodes.clone(); + let rpc_dir_clone = cache.node_rpc_directory.clone(); + let our_id = our_node_id; let bls_key = config.bls_private_key; let sigs = signatures.clone(); - // Convert B256 hash to iroh_blobs::Hash - let iroh_hash = Hash::from_bytes(hash.0); - - // Convert sources from B256 to iroh::NodeId - let iroh_sources: std::collections::HashSet<_> = sources - .into_iter() - .map(|(addr, sub_id, source_b256)| { - let node_id = iroh::NodeId::from_bytes(&source_b256.0) - .expect("B256 should be valid NodeId bytes"); - (addr, sub_id, node_id) - }) - .collect(); + info!( + "Spawning shard resolution for blob {} (size: {})", + hash, size + ); let handle = tokio::spawn(async move { - resolver::resolve_blob( + resolver::resolve_blob_shards( iroh_clone, - iroh_hash, + hash, size, - iroh_sources, + DEFAULT_DATA_SHARDS, + DEFAULT_PARITY_SHARDS, + nodes_clone, + rpc_dir_clone, + our_id, bls_key, sigs, ) diff --git a/ipc-storage/ipc-decentralized-storage/src/node/resolver.rs b/ipc-storage/ipc-decentralized-storage/src/node/resolver.rs index 68a588cf1..cd6c191b0 100644 --- a/ipc-storage/ipc-decentralized-storage/src/node/resolver.rs +++ b/ipc-storage/ipc-decentralized-storage/src/node/resolver.rs @@ -12,12 +12,17 @@ use std::time::Duration; use anyhow::{Context, Result}; use bls_signatures::{PrivateKey as BlsPrivateKey, Serialize as BlsSerialize}; +use erasure_encoding::{shards_for_node, shard_node, BlobId, NodeId, DEFAULT_MAX_CHUNK_SIZE}; use ethers::prelude::*; use ethers::providers::{Http, Provider}; +use futures::StreamExt; +use iroh::NodeAddr; use iroh_blobs::Hash; use iroh_manager::IrohNode; +use std::str::FromStr; use tracing::{debug, error, info, warn}; +use crate::distribution::{shard_key, NodeRpcDirectory}; use super::store::Store; use super::SignatureStorage; @@ -235,226 +240,216 @@ async fn handle_blob_event(event: BlobEvent, signatures: &SignatureStorage, iroh } } -/// Delete a blob and its associated content from Iroh storage +/// Delete a blob's shard data from Iroh storage. +/// +/// Iterates all Iroh tags and deletes any whose name starts with the blob's +/// hex prefix, covering all `{blob_hex}/{chunk}/{shard}` tags. async fn delete_blob_from_iroh(iroh: &IrohNode, hash: Hash) -> Result { - use iroh_blobs::hashseq::HashSeq; - - // First, try to read the hash sequence to get all associated hashes - let hash_seq_bytes = match iroh.blobs_client().read_to_bytes(hash).await { - Ok(bytes) => bytes, - Err(_) => { - // Blob not found, nothing to delete - return Ok(false); - } - }; - - // Parse the hash sequence - let content_hashes: Vec = match HashSeq::try_from(hash_seq_bytes) { - Ok(seq) => seq.iter().collect(), - Err(e) => { - warn!("Failed to parse hash sequence for {}: {}", hash, e); - // Still try to delete the main hash - vec![] + let blob_hex = hex::encode(hash.as_bytes()); + let prefix = format!("{}/", blob_hex); + + let mut tags = iroh.blobs_client().tags().list().await?; + let mut deleted_any = false; + + while let Some(Ok(tag_info)) = tags.next().await { + let tag_name = std::str::from_utf8(tag_info.name.0.as_ref()).unwrap_or(""); + if tag_name.starts_with(&prefix) { + debug!("Deleting shard tag: {}", tag_name); + let _ = iroh.blobs_client().tags().delete(tag_info.name).await; + deleted_any = true; } - }; - - // Delete the hash sequence blob tag - let seq_tag = iroh_blobs::Tag(format!("blob-seq-{}", hash).into()); - let _ = iroh.blobs_client().tags().delete(seq_tag).await; - - // Delete content blob tags - for content_hash in &content_hashes { - let content_tag = iroh_blobs::Tag(format!("blob-{}-{}", hash, content_hash).into()); - let _ = iroh.blobs_client().tags().delete(content_tag).await; } - Ok(true) + Ok(deleted_any) +} + +/// Response from the shard hash lookup endpoint. +#[derive(serde::Deserialize)] +struct ShardHashLookupResponse { + hash: String, + node_addr: NodeAddr, } -/// Resolve a blob by downloading it from one of its sources +/// Resolve a blob by downloading assigned shards from other operators. /// -/// Downloads the hash sequence and all blobs referenced within it (including original content). -/// Returns Ok(()) if the blob was successfully downloaded, Err otherwise. -pub async fn resolve_blob( +/// 1. Computes which shards are assigned to this node +/// 2. Checks which are already stored locally +/// 3. For missing shards, queries other operators for the shard hash and downloads via Iroh P2P +/// 4. Signs the blob hash with BLS key once all assigned shards are present +pub async fn resolve_blob_shards( iroh: IrohNode, - hash: Hash, + blob_hash: fendermint_actor_blobs_shared::bytes::B256, size: u64, - sources: std::collections::HashSet<( - fvm_shared::address::Address, - fendermint_actor_blobs_shared::blobs::SubscriptionId, - iroh::NodeId, - )>, + data_shards: usize, + parity_shards: usize, + nodes: Vec, + node_rpc_directory: NodeRpcDirectory, + our_node_id: NodeId, bls_private_key: BlsPrivateKey, signatures: SignatureStorage, ) -> Result<()> { - use iroh_blobs::hashseq::HashSeq; - - info!("Resolving blob: {} (size: {})", hash, size); - debug!("Sources: {} available", sources.len()); - - // Try each source until one succeeds - for (_subscriber, _id, source_node_id) in sources { - debug!("Attempting download from source: {}", source_node_id); - - // Create a NodeAddr from the source - let source_addr = iroh::NodeAddr::new(source_node_id); - - // Step 1: Download the hash sequence blob - match iroh - .blobs_client() - .download_with_opts( - hash, - iroh_blobs::rpc::client::blobs::DownloadOptions { - format: iroh_blobs::BlobFormat::Raw, - nodes: vec![source_addr.clone()], - tag: iroh_blobs::util::SetTagOption::Named(iroh_blobs::Tag( - format!("blob-seq-{}", hash).into(), - )), - mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, - }, - ) - .await - { - Ok(progress) => { - match progress.finish().await { - Ok(outcome) => { - let downloaded_size = outcome.local_size + outcome.downloaded_size; - info!( - "Downloaded hash sequence {} (downloaded: {} bytes, local: {} bytes)", - hash, outcome.downloaded_size, outcome.local_size - ); - - // Step 2: Read and parse the hash sequence to get all referenced blobs - let hash_seq_bytes = match iroh.blobs_client().read_to_bytes(hash).await { - Ok(bytes) => bytes, - Err(e) => { - warn!("Failed to read hash sequence {}: {}", hash, e); - continue; - } - }; - - let hash_seq = match HashSeq::try_from(hash_seq_bytes) { - Ok(seq) => seq, - Err(e) => { - warn!("Failed to parse hash sequence {}: {}", hash, e); - continue; - } - }; - - let content_hashes: Vec = hash_seq.iter().collect(); - info!( - "Hash sequence {} contains {} blobs to download", - hash, - content_hashes.len() - ); - - // Step 3: Download all blobs in the hash sequence - let mut all_downloaded = true; - for (idx, content_hash) in content_hashes.iter().enumerate() { - let blob_type = if idx == 0 { - "original content" - } else if idx == 1 { - "metadata" - } else { - "parity" - }; - - debug!( - "Downloading {} blob {} ({}/{}): {}", - blob_type, - content_hash, - idx + 1, - content_hashes.len(), - content_hash - ); - - match iroh - .blobs_client() - .download_with_opts( - *content_hash, - iroh_blobs::rpc::client::blobs::DownloadOptions { - format: iroh_blobs::BlobFormat::Raw, - nodes: vec![source_addr.clone()], - tag: iroh_blobs::util::SetTagOption::Named( - iroh_blobs::Tag( - format!("blob-{}-{}", hash, content_hash).into(), - ), - ), - mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, - }, - ) - .await - { - Ok(content_progress) => match content_progress.finish().await { - Ok(content_outcome) => { - debug!( - "Downloaded {} blob {} (downloaded: {} bytes, local: {} bytes)", - blob_type, - content_hash, - content_outcome.downloaded_size, - content_outcome.local_size - ); - } - Err(e) => { - warn!( - "Failed to complete {} blob {} download: {}", - blob_type, content_hash, e - ); - all_downloaded = false; - } - }, - Err(e) => { - warn!( - "Failed to start {} blob {} download: {}", - blob_type, content_hash, e - ); - all_downloaded = false; - } - } - } - - if !all_downloaded { - warn!( - "Not all content blobs downloaded for {}, trying next source", - hash - ); - continue; - } - - info!( - "Successfully resolved blob {} with all {} content blobs (expected original size: {} bytes)", - hash, content_hashes.len(), size - ); - - // Generate BLS signature for the blob hash - let hash_bytes = hash.as_bytes(); - let signature = bls_private_key.sign(hash_bytes); - let signature_bytes = signature.as_bytes(); - - // Store signature in memory - { - let mut sigs = signatures.write().unwrap(); - sigs.insert(hash, signature_bytes.clone()); - } - - info!("Generated BLS signature for blob {}", hash); - debug!("Signature: {}", hex::encode(&signature_bytes)); - debug!("Hash sequence blob size: {} bytes", downloaded_size); - - // Blob downloaded successfully - // It will now wait for validator signatures before finalization - return Ok(()); - } - Err(e) => { - warn!("Failed to complete download from {}: {}", source_node_id, e); - } + let blob_id = BlobId(blob_hash.0); + let blob_iroh_hash = Hash::from_bytes(blob_hash.0); + let num_chunks = (size as usize).div_ceil(DEFAULT_MAX_CHUNK_SIZE); + let shards_per_chunk = data_shards + parity_shards; + + info!( + "Resolving blob {} shards: {} chunks, k={}, m={}, size={}", + blob_hash, num_chunks, data_shards, parity_shards, size + ); + + // Compute which shards are assigned to this node + let assigned = shards_for_node( + &blob_id, + num_chunks, + data_shards, + parity_shards, + &nodes, + &our_node_id, + ); + + info!( + "Node has {} assigned shards for blob {}", + assigned.len(), + blob_hash + ); + + let mut missing_shards = Vec::new(); + + // Check which assigned shards are already stored locally + for &(chunk_idx, shard_idx) in &assigned { + let tag = shard_key(&blob_id, chunk_idx, shard_idx); + let iroh_tag = iroh_blobs::Tag(tag.clone().into()); + + let found = { + let mut tags = iroh.blobs_client().tags().list().await?; + let mut found = false; + while let Some(Ok(tag_info)) = tags.next().await { + if tag_info.name == iroh_tag { + found = true; + break; } } - Err(e) => { - warn!("Failed to start download from {}: {}", source_node_id, e); + found + }; + + if found { + debug!("Shard {}/{} already stored locally", chunk_idx, shard_idx); + } else { + missing_shards.push((chunk_idx, shard_idx)); + } + } + + if missing_shards.is_empty() { + info!("All assigned shards already present for blob {}", blob_hash); + } else { + info!( + "Need to fetch {} missing shards for blob {}", + missing_shards.len(), + blob_hash + ); + + let http_client = reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .build() + .context("failed to create HTTP client")?; + + for (chunk_idx, shard_idx) in missing_shards { + // Find which operator holds this shard + let holder = shard_node(&blob_id, chunk_idx, shard_idx, shards_per_chunk, &nodes); + + let rpc_url = node_rpc_directory.get(&holder).ok_or_else(|| { + anyhow::anyhow!( + "No RPC URL for node {:?} holding shard {}/{}", + holder, + chunk_idx, + shard_idx + ) + })?; + + let blob_id_hex = hex::encode(blob_id.0); + let url = format!( + "{}/v1/shards/{}/{}/{}/hash", + rpc_url.trim_end_matches('/'), + blob_id_hex, + chunk_idx, + shard_idx + ); + + debug!("Querying shard hash from {}", url); + + let resp = http_client + .get(&url) + .send() + .await + .with_context(|| format!("failed to query shard hash from {}", url))?; + + if !resp.status().is_success() { + anyhow::bail!( + "Shard hash lookup failed for {}/{}: HTTP {}", + chunk_idx, + shard_idx, + resp.status() + ); } + + let lookup: ShardHashLookupResponse = resp + .json() + .await + .context("failed to parse shard hash response")?; + + let shard_hash = Hash::from_str(&lookup.hash) + .map_err(|_| anyhow::anyhow!("invalid hash in shard lookup response"))?; + + let tag = shard_key(&blob_id, chunk_idx, shard_idx); + + // Download shard via Iroh P2P + info!( + "Downloading shard {}/{} (hash={}) from node", + chunk_idx, shard_idx, shard_hash + ); + + let progress = iroh + .blobs_client() + .download_with_opts( + shard_hash, + iroh_blobs::rpc::client::blobs::DownloadOptions { + format: iroh_blobs::BlobFormat::Raw, + nodes: vec![lookup.node_addr], + tag: iroh_blobs::util::SetTagOption::Named(iroh_blobs::Tag(tag.into())), + mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, + }, + ) + .await + .with_context(|| { + format!("failed to start shard {}/{} download", chunk_idx, shard_idx) + })?; + + let outcome = progress.finish().await.with_context(|| { + format!("shard {}/{} download did not complete", chunk_idx, shard_idx) + })?; + + info!( + "Downloaded shard {}/{} (downloaded: {} bytes, local: {} bytes)", + chunk_idx, shard_idx, outcome.downloaded_size, outcome.local_size + ); } } - anyhow::bail!("Failed to resolve blob {} from any source", hash) + // All assigned shards are now present — sign the blob hash + let signature = bls_private_key.sign(blob_iroh_hash.as_bytes()); + let signature_bytes = signature.as_bytes(); + + { + let mut sigs = signatures.write().unwrap(); + sigs.insert(blob_iroh_hash, signature_bytes.clone()); + } + + info!( + "Generated BLS signature for blob {} (all {} assigned shards present)", + blob_hash, + assigned.len() + ); + + Ok(()) } diff --git a/ipc-storage/ipc-decentralized-storage/src/node/rpc.rs b/ipc-storage/ipc-decentralized-storage/src/node/rpc.rs index 7a242ddeb..f7574b6cd 100644 --- a/ipc-storage/ipc-decentralized-storage/src/node/rpc.rs +++ b/ipc-storage/ipc-decentralized-storage/src/node/rpc.rs @@ -10,18 +10,22 @@ use std::convert::Infallible; use std::net::SocketAddr; +use std::str::FromStr; use anyhow::Result; +use bls_signatures::{PrivateKey as BlsPrivateKey, Serialize as BlsSerialize}; +use erasure_encoding::BlobId; use fendermint_actor_blobs_shared::bytes::B256; use fendermint_rpc::message::GasParams; -use fendermint_rpc::QueryClient; +use fendermint_rpc::{FendermintClient, QueryClient}; use fendermint_vm_message::query::FvmQueryHeight; use fvm_shared::econ::TokenAmount; use iroh_blobs::Hash; use iroh_manager::IrohNode; -use tracing::info; +use tracing::{info, warn}; use warp::Filter; +use crate::distribution::ShardPullRequest; use super::{SharedFendermintClient, SignatureStorage}; /// Start the RPC server for signature queries and blob queries @@ -30,11 +34,13 @@ pub async fn start_rpc_server( signatures: SignatureStorage, client: SharedFendermintClient, iroh: IrohNode, + bls_private_key: BlsPrivateKey, + rpc_url: tendermint_rpc::Url, ) -> Result<()> { // GET /signature/{hash} let get_signature = warp::path!("signature" / String) .and(warp::get()) - .and(with_signatures(signatures)) + .and(with_signatures(signatures.clone())) .and_then(handle_get_signature); // GET /health @@ -50,14 +56,40 @@ pub async fn start_rpc_server( .and(with_client(client_for_meta)) .and_then(handle_get_blob); - // GET /v1/blobs/{hash}/content - returns blob content as binary stream + // GET /v1/blobs/{hash}/content - returns blob content via shard retrieval + let iroh_for_content = iroh.clone(); + let rpc_url_for_content = rpc_url; let get_blob_content = warp::path!("v1" / "blobs" / String / "content") .and(warp::get()) - .and(warp::query::()) .and(with_client(client)) - .and(with_iroh(iroh)) + .and(with_iroh(iroh_for_content)) + .and(warp::any().map(move || rpc_url_for_content.clone())) .and_then(handle_get_blob_content); + // GET /v1/node - returns this node's Iroh NodeAddr for P2P connectivity + let iroh_for_node = iroh.clone(); + let get_node_addr = warp::path!("v1" / "node") + .and(warp::get()) + .and(with_iroh(iroh_for_node)) + .and_then(handle_get_node_addr); + + // GET /v1/shards/{blob_id}/{chunk_index}/{shard_index}/hash - lookup shard Iroh hash + let iroh_for_shard_hash = iroh.clone(); + let get_shard_hash = warp::path!("v1" / "shards" / String / usize / usize / "hash") + .and(warp::get()) + .and(with_iroh(iroh_for_shard_hash)) + .and_then(handle_get_shard_hash); + + // POST /v1/shards/pull - accept a shard pull request from a distributor + let signatures_for_pull = signatures.clone(); + let pull_shard = warp::path!("v1" / "shards" / "pull") + .and(warp::post()) + .and(warp::body::json()) + .and(with_iroh(iroh)) + .and(with_signatures(signatures_for_pull)) + .and(with_bls_key(bls_private_key)) + .and_then(handle_shard_pull); + // CORS configuration - allow all origins for development let cors = warp::cors() .allow_any_origin() @@ -66,8 +98,11 @@ pub async fn start_rpc_server( let routes = get_signature .or(health) + .or(get_node_addr) + .or(get_shard_hash) .or(get_blob_content) .or(get_blob) + .or(pull_shard) .with(cors); info!("RPC server starting on {}", bind_addr); @@ -82,6 +117,23 @@ fn with_signatures( warp::any().map(move || signatures.clone()) } +/// Warp filter to inject BLS private key +fn with_bls_key( + key: BlsPrivateKey, +) -> impl Filter + Clone { + warp::any().map(move || key) +} + +/// Handle GET /v1/node - returns this node's Iroh NodeAddr +async fn handle_get_node_addr(iroh: IrohNode) -> Result { + let node_addr = iroh.endpoint().node_addr().await.map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to get node address: {}", e), + }) + })?; + Ok(warp::reply::json(&node_addr)) +} + /// Response for signature query #[derive(serde::Serialize)] struct SignatureResponse { @@ -251,15 +303,18 @@ fn with_iroh(iroh: IrohNode) -> impl Filter Result { - use futures::TryStreamExt; - use iroh_blobs::hashseq::HashSeq; + use crate::gateway::BlobGateway; + use crate::objects::build_node_directories; + use crate::retrieval::{retrieve, BlobRetrievalParams}; use warp::hyper::Body; // Parse blob hash - strip 0x prefix if present @@ -296,12 +351,6 @@ async fn handle_get_blob_content( hash_array.copy_from_slice(&blob_hash_bytes); let blob_hash = B256(hash_array); - // Set query height - let height = height_query - .height - .map(FvmQueryHeight::from) - .unwrap_or(FvmQueryHeight::Committed); - // Gas params for the query call let gas_params = GasParams { gas_limit: Default::default(), @@ -309,87 +358,61 @@ async fn handle_get_blob_content( gas_premium: Default::default(), }; - // First query the blobs actor to verify the blob exists + // Query the blobs actor to get blob info (size, k, m) let maybe_blob = { let mut client_guard = client.lock().await; client_guard - .blob_get_call(blob_hash, TokenAmount::default(), gas_params, height) + .blob_get_call( + blob_hash, + TokenAmount::default(), + gas_params, + FvmQueryHeight::Committed, + ) .await }; match maybe_blob { Ok(Some(blob)) => { - // The blob hash is actually a hash sequence hash - let hash_seq_hash = Hash::from_bytes(blob_hash.0); let size = blob.size; + let data_shards = blob.data_shards as usize; + let parity_shards = blob.parity_shards as usize; - // Read the hash sequence from Iroh to get the original content hash - let hash_seq_bytes = match iroh.blobs_client().read_to_bytes(hash_seq_hash).await { - Ok(bytes) => bytes, - Err(e) => { - return Ok(warp::reply::with_status( - warp::reply::Response::new(Body::from( - serde_json::to_string(&ErrorResponse { - error: format!("failed to read hash sequence: {}", e), - }) - .unwrap(), - )), - warp::http::StatusCode::INTERNAL_SERVER_ERROR, - )); - } - }; - - let hash_seq = match HashSeq::try_from(hash_seq_bytes) { - Ok(seq) => seq, - Err(e) => { - return Ok(warp::reply::with_status( - warp::reply::Response::new(Body::from( - serde_json::to_string(&ErrorResponse { - error: format!("failed to parse hash sequence: {}", e), - }) - .unwrap(), - )), - warp::http::StatusCode::INTERNAL_SERVER_ERROR, - )); - } - }; - - // First hash in the sequence is the original content - let orig_hash = match hash_seq.iter().next() { - Some(hash) => hash, - None => { - return Ok(warp::reply::with_status( - warp::reply::Response::new(Body::from( - serde_json::to_string(&ErrorResponse { - error: "hash sequence is empty".to_string(), - }) - .unwrap(), - )), - warp::http::StatusCode::INTERNAL_SERVER_ERROR, - )); - } - }; - - // Read the actual content from Iroh - let reader = match iroh.blobs_client().read(orig_hash).await { - Ok(reader) => reader, - Err(e) => { - return Ok(warp::reply::with_status( - warp::reply::Response::new(Body::from( - serde_json::to_string(&ErrorResponse { - error: format!("failed to read blob content: {}", e), - }) - .unwrap(), - )), - warp::http::StatusCode::INTERNAL_SERVER_ERROR, - )); - } - }; + // Build node directories from on-chain operator state + let retrieval_client = FendermintClient::new_http(rpc_url, None).map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to create client: {}", e), + }) + })?; + let gateway = + BlobGateway::new(retrieval_client, 10, std::time::Duration::from_secs(5)); + let (nodes, node_directory, node_rpc_directory) = + build_node_directories(&gateway).await.map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to build node directories: {}", e), + }) + })?; + + let blob_id = BlobId(blob_hash.0); + let retrieved_data = retrieve( + &BlobRetrievalParams { + blob_id, + original_len: size as usize, + data_shards, + parity_shards, + nodes, + node_directory, + node_rpc_directory, + }, + iroh.blobs_client(), + ) + .await + .map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to retrieve blob: {}", e), + }) + })?; - // Stream the content as the response body - let bytes_stream = - reader.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)); - let body = Body::wrap_stream(bytes_stream); + let body = Body::from(retrieved_data); let mut response = warp::reply::Response::new(body); response.headers_mut().insert( @@ -425,3 +448,225 @@ async fn handle_get_blob_content( )), } } + +/// Response for shard hash lookup +#[derive(serde::Serialize)] +struct ShardHashResponse { + hash: String, + node_addr: iroh::NodeAddr, +} + +/// Handle GET /v1/shards/{blob_id}/{chunk_index}/{shard_index}/hash +/// +/// Returns the Iroh content hash for a locally-stored shard, allowing other +/// nodes to download it via Iroh P2P. +async fn handle_get_shard_hash( + blob_id_hex: String, + chunk_index: usize, + shard_index: usize, + iroh: IrohNode, +) -> Result { + let blob_id_bytes = hex::decode(&blob_id_hex).map_err(|_| { + warp::reject::custom(RpcBadRequest { + message: "invalid blob_id hex".to_string(), + }) + })?; + if blob_id_bytes.len() != 32 { + return Err(warp::reject::custom(RpcBadRequest { + message: format!("blob_id must be 32 bytes, got {}", blob_id_bytes.len()), + })); + } + let mut blob_id_array = [0u8; 32]; + blob_id_array.copy_from_slice(&blob_id_bytes); + let blob_id = BlobId(blob_id_array); + + let tag = crate::distribution::shard_key(&blob_id, chunk_index, shard_index); + + // Look up the Iroh hash for this shard tag + let iroh_tag = iroh_blobs::Tag(tag.into()); + let hash = { + use futures::StreamExt; + let mut tags = iroh.blobs_client().tags().list().await.map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to list tags: {}", e), + }) + })?; + let mut found = None; + while let Some(Ok(tag_info)) = tags.next().await { + if tag_info.name == iroh_tag { + found = Some(tag_info.hash); + break; + } + } + found + }; + + match hash { + Some(hash) => { + let node_addr = iroh.endpoint().node_addr().await.map_err(|e| { + warp::reject::custom(RpcBadRequest { + message: format!("failed to get node address: {}", e), + }) + })?; + Ok(warp::reply::with_status( + warp::reply::json(&ShardHashResponse { + hash: hash.to_string(), + node_addr, + }), + warp::http::StatusCode::OK, + )) + } + None => Ok(warp::reply::with_status( + warp::reply::json(&ErrorResponse { + error: "shard not found".to_string(), + }), + warp::http::StatusCode::NOT_FOUND, + )), + } +} + +/// Response for shard pull request +#[derive(serde::Serialize)] +struct ShardPullResponse { + status: String, + shard_key: String, +} + +/// Handle POST /v1/shards/pull +/// +/// A distributor calls this to tell us to download a shard from them. +/// We verify the shard is assigned to us, download from the gateway via Iroh, +/// and generate a BLS signature once the download completes. +async fn handle_shard_pull( + request: ShardPullRequest, + iroh: IrohNode, + signatures: SignatureStorage, + bls_private_key: BlsPrivateKey, +) -> Result { + // Parse blob_id + let blob_id_bytes = hex::decode(&request.blob_id).map_err(|_| { + warp::reject::custom(RpcBadRequest { + message: "invalid blob_id hex".to_string(), + }) + })?; + if blob_id_bytes.len() != 32 { + return Err(warp::reject::custom(RpcBadRequest { + message: format!("blob_id must be 32 bytes, got {}", blob_id_bytes.len()), + })); + } + let mut blob_id_array = [0u8; 32]; + blob_id_array.copy_from_slice(&blob_id_bytes); + let blob_id = BlobId(blob_id_array); + + // Parse the shard's Iroh content hash + let hash = Hash::from_str(&request.hash).map_err(|_| { + warp::reject::custom(RpcBadRequest { + message: "invalid hash".to_string(), + }) + })?; + + // Verify this shard is assigned to us + // TODO: get the node list from on-chain state at encoding epoch. + // For now, we skip full assignment verification and accept the pull request. + // Once on-chain integration is in place, we'd call: + // shard_verifier::verify_shard_assignment( + // &blob_id, request.chunk_index, request.shard_index, + // request.shards_per_chunk, &nodes, &our_node_id + // )?; + + let shard_key = crate::distribution::shard_key( + &blob_id, + request.chunk_index, + request.shard_index, + ); + + info!( + "Received shard pull request: {} (hash={})", + shard_key, hash + ); + + // Spawn a task to download from the gateway and sign on completion + let shard_key_clone = shard_key.clone(); + tokio::spawn(async move { + let download_result = iroh + .blobs_client() + .download_with_opts( + hash, + iroh_blobs::rpc::client::blobs::DownloadOptions { + format: iroh_blobs::BlobFormat::Raw, + nodes: vec![request.source], + tag: iroh_blobs::util::SetTagOption::Named(iroh_blobs::Tag( + shard_key_clone.clone().into(), + )), + mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, + }, + ) + .await; + + match download_result { + Ok(progress) => match progress.finish().await { + Ok(outcome) => { + info!( + "Downloaded shard {} (downloaded: {} bytes, local: {} bytes, total: {} bytes)", + shard_key_clone, outcome.downloaded_size, outcome.local_size, + outcome.downloaded_size + outcome.local_size + ); + + // Verify shard content size + match iroh.blobs_client().read_to_bytes(hash).await { + Ok(bytes) => { + info!( + "Verified shard {} content: {} bytes", + shard_key_clone, bytes.len() + ); + } + Err(e) => { + warn!( + "Failed to verify shard {} content: {}", + shard_key_clone, e + ); + } + } + + // Generate BLS signature over the blob hash (not the shard hash) + // The contract verifies signatures over the blob hash + let blob_hash = Hash::from_bytes(blob_id.0); + let signature = bls_private_key.sign(blob_hash.as_bytes()); + let signature_bytes = signature.as_bytes(); + + // Store signature keyed by blob hash for gateway collection + { + let mut sigs = signatures.write().unwrap(); + sigs.insert(blob_hash, signature_bytes.clone()); + } + + info!( + "Generated BLS signature for shard {} (blob_hash={})", + shard_key_clone, blob_hash + ); + } + Err(e) => { + warn!("Failed to complete shard {} download: {}", shard_key_clone, e); + } + }, + Err(e) => { + warn!("Failed to start shard {} download: {}", shard_key_clone, e); + } + } + }); + + Ok(warp::reply::with_status( + warp::reply::json(&ShardPullResponse { + status: "accepted".to_string(), + shard_key, + }), + warp::http::StatusCode::ACCEPTED, + )) +} + +#[derive(Debug)] +struct RpcBadRequest { + message: String, +} + +impl warp::reject::Reject for RpcBadRequest {} diff --git a/ipc-storage/ipc-decentralized-storage/src/node/shard_verifier.rs b/ipc-storage/ipc-decentralized-storage/src/node/shard_verifier.rs new file mode 100644 index 000000000..f952263f5 --- /dev/null +++ b/ipc-storage/ipc-decentralized-storage/src/node/shard_verifier.rs @@ -0,0 +1,77 @@ +// Copyright 2025 Recall Contributors +// SPDX-License-Identifier: Apache-2.0, MIT + +//! Shard assignment verification for storage nodes. +//! +//! When a storage node receives a shard pull request, it uses this module +//! to verify that the shard is legitimately assigned to it per the +//! deterministic mapping. + +use anyhow::{ensure, Result}; +use erasure_encoding::{shard_node, BlobId, NodeId}; + +/// Verify that a shard is correctly assigned to the expected node. +/// +/// Called by the storage node when receiving a shard pull request. +/// Returns `Ok(())` if the shard belongs to `expected_node`, error otherwise. +pub fn verify_shard_assignment( + blob_id: &BlobId, + chunk_index: usize, + shard_index: usize, + shards_per_chunk: usize, + nodes: &[NodeId], + expected_node: &NodeId, +) -> Result<()> { + let assigned = shard_node(blob_id, chunk_index, shard_index, shards_per_chunk, nodes); + ensure!( + assigned == *expected_node, + "Shard {}/{} is assigned to node {:?}, not {:?}", + chunk_index, + shard_index, + assigned, + expected_node + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_nodes(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut id = [0u8; 32]; + id[0] = i as u8; + NodeId(id) + }) + .collect() + } + + #[test] + fn verify_correct_assignment() { + let mut blob_bytes = [0u8; 32]; + blob_bytes[0] = 7; + let blob_id = BlobId(blob_bytes); + let nodes = make_nodes(5); + let shards_per_chunk = 3; + + // blob_id [7, 0..] % 5 = 2, so shard (0, 0) → node[2] + let expected = shard_node(&blob_id, 0, 0, shards_per_chunk, &nodes); + assert!(verify_shard_assignment(&blob_id, 0, 0, shards_per_chunk, &nodes, &expected).is_ok()); + } + + #[test] + fn reject_wrong_assignment() { + let mut blob_bytes = [0u8; 32]; + blob_bytes[0] = 7; + let blob_id = BlobId(blob_bytes); + let nodes = make_nodes(5); + let shards_per_chunk = 3; + + // node[0] is not the correct assignee for shard (0, 0) + let wrong_node = nodes[0]; + let result = verify_shard_assignment(&blob_id, 0, 0, shards_per_chunk, &nodes, &wrong_node); + assert!(result.is_err()); + } +} diff --git a/ipc-storage/ipc-decentralized-storage/src/objects.rs b/ipc-storage/ipc-decentralized-storage/src/objects.rs index c2ca3cab0..41a0234b6 100644 --- a/ipc-storage/ipc-decentralized-storage/src/objects.rs +++ b/ipc-storage/ipc-decentralized-storage/src/objects.rs @@ -5,34 +5,29 @@ //! Objects API service for handling object upload and download //! //! This module provides HTTP endpoints for: -//! - Uploading objects to Iroh storage with entanglement +//! - Uploading objects to Iroh storage with erasure encoding + distribution //! - Downloading objects from buckets //! - Downloading blobs directly -use std::{ - convert::Infallible, net::SocketAddr, num::ParseIntError, path::Path, str::FromStr, - time::Instant, -}; +use std::{convert::Infallible, net::SocketAddr, path::Path, str::FromStr, time::Instant}; use anyhow::{anyhow, Context, Result}; use bytes::Buf; -use entangler::{ChunkRange, Config, EntanglementResult, Entangler}; -use entangler_storage::iroh::IrohStorage as EntanglerIrohStorage; +use erasure_encoding::{BlobId, NodeId}; use fendermint_actor_bucket::{GetParams, Object}; use fendermint_rpc::{client::FendermintClient, message::GasParams, QueryClient}; use fendermint_vm_message::query::FvmQueryHeight; -use futures_util::{StreamExt, TryStreamExt}; +use futures_util::StreamExt; use fvm_shared::address::{Address, Error as NetworkError, Network}; use fvm_shared::econ::TokenAmount; use ipc_api::ethers_address_to_fil_address; use iroh::NodeAddr; -use iroh_blobs::{hashseq::HashSeq, rpc::client::blobs::BlobStatus, util::SetTagOption, Hash}; -use iroh_manager::{get_blob_hash_and_size, BlobsClient, IrohNode}; +use iroh_blobs::Hash; +use iroh_manager::{BlobsClient, IrohNode}; use lazy_static::lazy_static; use mime_guess::get_mime_extensions_str; use prometheus::{register_histogram, register_int_counter, Histogram, IntCounter}; use serde::{Deserialize, Serialize}; -use thiserror::Error; use tracing::{debug, info}; use uuid::Uuid; use warp::path::Tail; @@ -43,13 +38,14 @@ use warp::{ Filter, Rejection, Reply, }; -/// The alpha parameter for alpha entanglement determines the number of parity blobs to generate -/// for the original blob. -const ENTANGLER_ALPHA: u8 = 3; -/// The s parameter for alpha entanglement determines the number of horizontal strands in the grid. -const ENTANGLER_S: u8 = 5; -/// Chunk size used by the entangler. -const CHUNK_SIZE: u64 = 1024; +use crate::distribution::{distribute, DistributeParams, NodeDirectory, NodeRpcDirectory}; +use crate::gateway::BlobGateway; +use crate::retrieval::{retrieve, BlobRetrievalParams}; + +/// Number of data shards per chunk for erasure encoding (k). +const DEFAULT_DATA_SHARDS: usize = 4; +/// Number of parity shards per chunk for erasure encoding (m). +const DEFAULT_PARITY_SHARDS: usize = 2; /// Configuration for the objects service #[derive(Clone, Debug)] @@ -113,6 +109,7 @@ pub async fn run_objects_service( .and(with_iroh(iroh_node.clone())) .and(warp::multipart::form().max_length(config.max_object_size + 1024 * 1024)) .and(with_max_size(config.max_object_size)) + .and(with_client(client.clone())) .and_then(handle_object_upload); let objects_download = warp::path!("v1" / "objects" / String / ..) @@ -123,7 +120,6 @@ pub async fn run_objects_service( .or(warp::head().map(|| "HEAD".to_string())) .unify(), ) - .and(warp::header::optional::("Range")) .and(warp::query::()) .and(with_client(client.clone())) .and(with_iroh_blobs(iroh_resolver_blobs.clone())) @@ -136,7 +132,6 @@ pub async fn run_objects_service( .or(warp::head().map(|| "HEAD".to_string())) .unify(), ) - .and(warp::header::optional::("Range")) .and(warp::query::()) .and(with_client(client.clone())) .and(with_iroh_blobs(iroh_resolver_blobs.clone())) @@ -181,6 +176,7 @@ pub fn objects_routes( .and(with_iroh(iroh_node.clone())) .and(warp::multipart::form().max_length(max_object_size + 1024 * 1024)) .and(with_max_size(max_object_size)) + .and(with_client(client.clone())) .and_then(handle_object_upload); let objects_download = warp::path!("v1" / "objects" / String / ..) @@ -191,7 +187,6 @@ pub fn objects_routes( .or(warp::head().map(|| "HEAD".to_string())) .unify(), ) - .and(warp::header::optional::("Range")) .and(warp::query::()) .and(with_client(client.clone())) .and(with_iroh_blobs(iroh_resolver_blobs.clone())) @@ -204,7 +199,6 @@ pub fn objects_routes( .or(warp::head().map(|| "HEAD".to_string())) .unify(), ) - .and(warp::header::optional::("Range")) .and(warp::query::()) .and(with_client(client.clone())) .and(with_iroh_blobs(iroh_resolver_blobs.clone())) @@ -242,25 +236,9 @@ struct HeightQuery { pub height: Option, } -#[derive(Debug, Error)] -enum ObjectsError { - #[error("error parsing range header: `{0}`")] - RangeHeaderParseError(ParseIntError), - #[error("invalid range header")] - RangeHeaderInvalid, -} - -impl From for ObjectsError { - fn from(err: ParseIntError) -> Self { - ObjectsError::RangeHeaderParseError(err) - } -} - #[derive(Default)] struct ObjectParser { - hash: Option, size: Option, - source: Option, data_part: Option, } @@ -278,14 +256,6 @@ impl ObjectParser { Ok(value) } - async fn read_hash(&mut self, form_part: Part) -> anyhow::Result<()> { - let value = self.read_part(form_part).await?; - let text = String::from_utf8(value).map_err(|_| anyhow!("cannot parse hash"))?; - let hash: Hash = text.parse().map_err(|_| anyhow!("cannot parse hash"))?; - self.hash = Some(hash); - Ok(()) - } - async fn read_size(&mut self, form_part: Part) -> anyhow::Result<()> { let value = self.read_part(form_part).await?; let text = String::from_utf8(value).map_err(|_| anyhow!("cannot parse size"))?; @@ -294,42 +264,23 @@ impl ObjectParser { Ok(()) } - async fn read_source(&mut self, form_part: Part) -> anyhow::Result<()> { - let value = self.read_part(form_part).await?; - let text = String::from_utf8(value).map_err(|_| anyhow!("cannot parse source"))?; - let source: NodeAddr = - serde_json::from_str(&text).map_err(|_| anyhow!("cannot parse source"))?; - self.source = Some(source); - Ok(()) - } - async fn read_form(mut form_data: warp::multipart::FormData) -> anyhow::Result { let mut object_parser = ObjectParser::default(); while let Some(part) = form_data.next().await { let part = part.map_err(|e| anyhow!("cannot read form data: {}", e))?; match part.name() { - "hash" => { - object_parser.read_hash(part).await?; - } "size" => { object_parser.read_size(part).await?; } - "source" => { - object_parser.read_source(part).await?; - } "data" => { object_parser.data_part = Some(part); // This early return was added to avoid the "failed to lock multipart state" error. // It implies that the data field must be the last one sent in the multipart form. return Ok(object_parser); } - // Ignore but accept signature-related fields for backward compatibility - "chain_id" | "msg" => { - // Read and discard the data - let _ = object_parser.read_part(part).await?; - } _ => { - return Err(anyhow!("unknown form field")); + // Ignore unknown fields for forward compatibility + let _ = object_parser.read_part(part).await?; } } } @@ -385,15 +336,23 @@ async fn handle_node_addr(iroh: IrohNode) -> Result { #[derive(Serialize)] struct UploadResponse { - hash: String, // Hash sequence hash (for bucket storage) - orig_hash: String, // Original blob content hash (for addBlob) - metadata_hash: String, + /// Original blob content hash. + hash: String, + /// Number of chunks the data was split into. + num_chunks: usize, + /// Number of data shards per chunk (k). + data_shards: usize, + /// Number of parity shards per chunk (m). + parity_shards: usize, + /// Original data length in bytes. + original_len: usize, } async fn handle_object_upload( iroh: IrohNode, form_data: warp::multipart::FormData, max_size: u64, + client: FendermintClient, ) -> Result { let start_time = Instant::now(); let parser = ObjectParser::read_form(form_data).await.map_err(|e| { @@ -416,281 +375,172 @@ async fn handle_object_upload( })); } - let upload_id = Uuid::new_v4(); - - // Handle the two upload cases - let hash = match (parser.source, parser.data_part) { - // Case 1: Source node provided - download from the source - (Some(source), None) => { - let hash = match parser.hash { - Some(hash) => hash, - None => { - return Err(Rejection::from(BadRequest { - message: "missing hash in form".to_string(), - })) - } - }; - - let tag = iroh_blobs::Tag(format!("temp-{hash}-{upload_id}").into()); - let progress = iroh - .blobs_client() - .download_with_opts( - hash, - iroh_blobs::rpc::client::blobs::DownloadOptions { - format: iroh_blobs::BlobFormat::Raw, - nodes: vec![source], - tag: SetTagOption::Named(tag), - mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, - }, - ) - .await - .map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to fetch blob {}: {}", hash, e), - }) - })?; - let outcome = progress.finish().await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to fetch blob {}: {}", hash, e), - }) - })?; - let outcome_size = outcome.local_size + outcome.downloaded_size; - if outcome_size != size { - return Err(Rejection::from(BadRequest { - message: format!( - "blob size and given size do not match (expected {}, got {})", - size, outcome_size - ), - })); - } + let _upload_id = Uuid::new_v4(); - debug!( - "downloaded blob {} in {:?} (size: {}; local_size: {}; downloaded_size: {})", - hash, outcome.stats.elapsed, size, outcome.local_size, outcome.downloaded_size, - ); - COUNTER_BYTES_UPLOADED.inc_by(outcome.downloaded_size); - hash + // Collect upload data into memory + let data_part = match parser.data_part { + Some(part) => part, + None => { + return Err(Rejection::from(BadRequest { + message: "missing data in form".to_string(), + })) } + }; - // Case 2: Direct upload - store the provided data - (None, Some(data_part)) => { - let stream = data_part.stream().map(|result| { - result - .map(|mut buf| buf.copy_to_bytes(buf.remaining())) - .map_err(|e| { - std::io::Error::new(std::io::ErrorKind::Other, format!("Warp error: {}", e)) - }) - }); - - let batch = iroh.blobs_client().batch().await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to store blob: {}", e), - }) - })?; - let temp_tag = batch.add_stream(stream).await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to store blob: {}", e), - }) - })?; - - let hash = *temp_tag.hash(); - let new_tag = iroh_blobs::Tag(format!("temp-{hash}-{upload_id}").into()); - batch.persist_to(temp_tag, new_tag).await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to persist blob: {}", e), - }) - })?; - - drop(batch); + let mut data = Vec::new(); + let mut stream = data_part.stream(); + while let Some(result) = stream.next().await { + let mut buf = result.map_err(|e| { + Rejection::from(BadRequest { + message: format!("failed to read upload stream: {}", e), + }) + })?; + data.extend_from_slice(&buf.copy_to_bytes(buf.remaining())); + } - let status = iroh.blobs_client().status(hash).await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to check blob status: {}", e), - }) - })?; - let BlobStatus::Complete { size } = status else { - return Err(Rejection::from(BadRequest { - message: "failed to store data".to_string(), - })); - }; - COUNTER_BYTES_UPLOADED.inc_by(size); - debug!("stored uploaded blob {} (size: {})", hash, size); - - hash - } + if data.len() as u64 != size { + return Err(Rejection::from(BadRequest { + message: format!( + "data size and given size do not match (expected {}, got {})", + size, + data.len() + ), + })); + } - (Some(_), Some(_)) => { - return Err(Rejection::from(BadRequest { - message: "cannot provide both source and data".to_string(), - })); - } + // Compute content hash (blake3) from the uploaded data + let hash = Hash::from(*blake3::hash(&data).as_bytes()); - (None, None) => { - return Err(Rejection::from(BadRequest { - message: "must provide either source or data".to_string(), - })); - } - }; + COUNTER_BYTES_UPLOADED.inc_by(data.len() as u64); + debug!("uploaded blob {} (size: {})", hash, data.len()); - debug!("raw uploaded hash: {}", hash); + // Build node directories from on-chain state + let gateway = BlobGateway::new(client, 10, std::time::Duration::from_secs(5)); + let (nodes, node_directory, node_rpc_directory) = + build_node_directories(&gateway).await.map_err(|e| { + Rejection::from(BadRequest { + message: format!("failed to build node directories: {}", e), + }) + })?; - let ent = new_entangler(iroh.blobs_client()).map_err(|e| { + // Get local node address for P2P shard distribution + let local_node_addr = iroh.endpoint().node_addr().await.map_err(|e| { Rejection::from(BadRequest { - message: format!("failed to create entangler: {}", e), + message: format!("failed to get local node address: {}", e), }) })?; - let ent_result = ent.entangle_uploaded(hash.to_string()).await.map_err(|e| { + + // Erasure-encode and distribute shards to assigned nodes + let blob_id = BlobId(*hash.as_bytes()); + info!( + "Distributing blob: hash={}, data_len={}, blob_id={}, nodes={}", + hash, data.len(), hex::encode(blob_id.0), nodes.len() + ); + let dist_result = distribute( + DistributeParams { + blob_id, + data, + data_shards: DEFAULT_DATA_SHARDS, + parity_shards: DEFAULT_PARITY_SHARDS, + nodes, + node_directory, + node_rpc_directory, + }, + iroh.blobs_client(), + &local_node_addr, + ) + .await + .map_err(|e| { Rejection::from(BadRequest { - message: format!("failed to entangle uploaded data: {}", e), + message: format!("failed to distribute shards: {}", e), }) })?; - debug!( - "entanglement result: orig_hash={}, metadata_hash={}, upload_results_count={}", - ent_result.orig_hash, - ent_result.metadata_hash, - ent_result.upload_results.len() + info!( + "Distribution complete: num_chunks={}, k={}, m={}, original_len={}, failures={}, total_shards={}", + dist_result.metadata.num_chunks, + dist_result.metadata.data_shards, + dist_result.metadata.parity_shards, + dist_result.metadata.original_len, + dist_result.failure_count(), + dist_result.shard_results.len() ); - - let hash_seq_hash = tag_entangled_data(&iroh, &ent_result, upload_id) - .await - .map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to tag entangled data: {}", e), - }) - })?; - - debug!("hash_seq_hash: {}", hash_seq_hash); + for sr in &dist_result.shard_results { + info!( + " Shard result {}/{}: hash={:?}, success={}, error={:?}", + sr.chunk_index, sr.shard_index, sr.iroh_hash, sr.success, sr.error + ); + } COUNTER_BLOBS_UPLOADED.inc(); HISTOGRAM_UPLOAD_TIME.observe(start_time.elapsed().as_secs_f64()); let response = UploadResponse { - hash: hash_seq_hash.to_string(), - orig_hash: ent_result.orig_hash.clone(), - metadata_hash: ent_result.metadata_hash, + hash: hash.to_string(), + num_chunks: dist_result.metadata.num_chunks, + data_shards: dist_result.metadata.data_shards, + parity_shards: dist_result.metadata.parity_shards, + original_len: dist_result.metadata.original_len, }; Ok(warp::reply::json(&response)) } -async fn tag_entangled_data( - iroh: &IrohNode, - ent_result: &EntanglementResult, - upload_id: Uuid, -) -> Result { - let orig_hash = Hash::from_str(ent_result.orig_hash.as_str())?; - let metadata_hash = Hash::from_str(ent_result.metadata_hash.as_str())?; - - // collect all hashes related to the blob, but ignore the metadata hash, as we want to make - // sure that the metadata hash is the second hash in the sequence after the original hash - let upload_hashes = ent_result - .upload_results - .iter() - .map(|r| Hash::from_str(&r.hash)) - .collect::, _>>()? - .into_iter() - .filter(|h| h != &metadata_hash) - .collect::>(); - - let mut hashes = vec![orig_hash, metadata_hash]; - hashes.extend(upload_hashes); - - let hashes_str = hashes - .iter() - .map(|h| h.to_string()) - .collect::>() - .join(", "); - - let batch = iroh.blobs_client().batch().await?; - - // make a hash sequence object from the hashes and upload it to iroh - let hash_seq = hashes.into_iter().collect::(); - - let temp_tag = batch - .add_bytes_with_opts(hash_seq, iroh_blobs::BlobFormat::HashSeq) - .await?; - let hash_seq_hash = *temp_tag.hash(); +/// Query on-chain operator state and build node directories for shard distribution. +pub async fn build_node_directories( + gateway: &BlobGateway, +) -> Result<(Vec, NodeDirectory, NodeRpcDirectory)> { + let operators = gateway + .query_active_operators() + .await + .context("failed to query active operators")?; - debug!( - "storing hash sequence: {} ({})", - hash_seq_hash.to_string(), - hashes_str - ); + if operators.is_empty() { + anyhow::bail!("no active operators found"); + } - // this tag will be replaced later by the validator to "stored-seq-{hash_seq_hash}" - let hash_seq_tag = iroh_blobs::Tag(format!("temp-seq-{hash_seq_hash}").into()); - batch.persist_to(temp_tag, hash_seq_tag).await?; + let mut nodes = Vec::new(); + let mut node_directory = NodeDirectory::new(); + let mut node_rpc_directory = NodeRpcDirectory::new(); - drop(batch); + let http_client = reqwest::Client::new(); - // delete all tags returned by the entangler - for ent_upload_result in &ent_result.upload_results { - let tag_value = ent_upload_result - .info - .get("tag") - .ok_or_else(|| anyhow!("Missing tag in entanglement upload result"))?; - let tag = iroh_blobs::Tag::from(tag_value.clone()); - iroh.blobs_client().tags().delete(tag).await?; - } + for operator_addr in &operators { + let info = gateway + .get_operator_info(*operator_addr) + .await + .context("failed to get operator info")?; - // remove upload tags - let orig_tag = iroh_blobs::Tag(format!("temp-{orig_hash}-{upload_id}").into()); - iroh.blobs_client().tags().delete(orig_tag).await?; + // Query the operator's RPC endpoint to get their Iroh NodeAddr + let url = format!("{}/v1/node", info.rpc_url.trim_end_matches('/')); + let resp = http_client + .get(&url) + .send() + .await + .with_context(|| format!("failed to query node addr from {}", url))?; - Ok(hash_seq_hash) -} + if !resp.status().is_success() { + anyhow::bail!("node {} returned {}", url, resp.status()); + } -fn new_entangler(iroh: &BlobsClient) -> Result, entangler::Error> { - Entangler::new( - EntanglerIrohStorage::from_client(iroh.clone()), - Config::new(ENTANGLER_ALPHA, ENTANGLER_S), - ) -} + let node_addr: NodeAddr = resp + .json() + .await + .with_context(|| format!("failed to parse node addr from {}", url))?; -fn get_range_params(range: String, size: u64) -> Result<(u64, u64), ObjectsError> { - let range: Vec = range - .replace("bytes=", "") - .split('-') - .map(|n| n.to_string()) - .collect(); - if range.len() != 2 { - return Err(ObjectsError::RangeHeaderInvalid); - } - let (first, mut last): (u64, u64) = match (!range[0].is_empty(), !range[1].is_empty()) { - (true, true) => (range[0].parse::()?, range[1].parse::()?), - (true, false) => (range[0].parse::()?, size - 1), - (false, true) => { - let last = range[1].parse::()?; - if last > size { - (0, size - 1) - } else { - (size - last, size - 1) - } - } - (false, false) => (0, size - 1), - }; - if first > last || first >= size { - return Err(ObjectsError::RangeHeaderInvalid); - } - if last >= size { - last = size - 1; + let node_id = NodeId(node_addr.node_id.as_bytes().clone()); + + nodes.push(node_id); + node_directory.insert(node_id, node_addr); + node_rpc_directory.insert(node_id, info.rpc_url.clone()); } - Ok((first, last)) -} -struct ObjectRange { - start: u64, - end: u64, - len: u64, - size: u64, - body: Body, + Ok((nodes, node_directory, node_rpc_directory)) } -async fn handle_object_download( +async fn handle_object_download( address: String, tail: Tail, method: String, - range: Option, height_query: HeightQuery, client: F, iroh: BlobsClient, @@ -714,7 +564,7 @@ async fn handle_object_download( let key: Vec = path.into(); let start_time = Instant::now(); - let maybe_object = os_get(client, address, GetParams(key.clone()), height) + let maybe_object = os_get(client.clone(), address, GetParams(key.clone()), height) .await .map_err(|e| { Rejection::from(BadRequest { @@ -724,122 +574,50 @@ async fn handle_object_download( match maybe_object { Some(object) => { - let seq_hash = Hash::from_bytes(object.hash.0); - let (hash, size) = get_blob_hash_and_size(&iroh, seq_hash).await.map_err(|e| { - Rejection::from(BadRequest { - message: e.to_string(), - }) - })?; + let size = object.size; + + // Retrieve blob by fetching shards from assigned nodes and RS-decoding + let gateway = BlobGateway::new(client, 10, std::time::Duration::from_secs(5)); + let (nodes, node_directory, node_rpc_directory) = + build_node_directories(&gateway).await.map_err(|e| { + Rejection::from(BadRequest { + message: format!("failed to build node directories: {}", e), + }) + })?; - let ent = new_entangler(&iroh).map_err(|e| { + let blob_id = BlobId(object.hash.0); + let retrieved_data = retrieve( + &BlobRetrievalParams { + blob_id, + original_len: size as usize, + data_shards: DEFAULT_DATA_SHARDS, + parity_shards: DEFAULT_PARITY_SHARDS, + nodes, + node_directory, + node_rpc_directory, + }, + &iroh, + ) + .await + .map_err(|e| { Rejection::from(BadRequest { - message: format!("failed to create entangler: {}", e), + message: format!("failed to retrieve blob: {}", e), }) })?; - let recovery_hash = Hash::from_bytes(object.recovery_hash.0); - - let object_range = match range { - Some(range) => { - let (first_byte, last_byte) = get_range_params(range, size).map_err(|e| { - Rejection::from(BadRequest { - message: e.to_string(), - }) - })?; - let len = (last_byte - first_byte) + 1; - - let first_chunk = first_byte / CHUNK_SIZE; - let last_chunk = last_byte / CHUNK_SIZE; - - let bytes_stream = ent - .download_range( - &hash.to_string(), - ChunkRange::Between(first_chunk, last_chunk), - Some(recovery_hash.to_string()), - ) - .await - .map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to download object: {} {}", hash, e), - }) - })?; - - let offset = (first_byte % CHUNK_SIZE) as usize; - let end_offset = (last_byte % CHUNK_SIZE + 1) as usize; - - let bytes_stream = bytes_stream.enumerate().map(move |(i, chunk)| { - let chunk = chunk?; - let result = if first_chunk == last_chunk { - // Single chunk case - slice with both offsets - chunk.slice(offset..end_offset) - } else if i == 0 { - // First of multiple chunks - chunk.slice(offset..) - } else if i == (last_chunk - first_chunk) as usize { - // Last of multiple chunks - chunk.slice(..end_offset) - } else { - // Middle chunks - chunk - }; - Ok::<_, anyhow::Error>(result) - }); - - let body = Body::wrap_stream(bytes_stream); - ObjectRange { - start: first_byte, - end: last_byte, - len, - size, - body, - } - } - None => { - let bytes_stream = ent - .download(&hash.to_string(), Some(&recovery_hash.to_string())) - .await - .map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to download object: {} {}", hash, e), - }) - })?; - let body = Body::wrap_stream(bytes_stream.map_err(|e| anyhow::anyhow!(e))); - ObjectRange { - start: 0, - end: size - 1, - len: size, - size, - body, - } - } - }; - // If it is a HEAD request, we don't need to send the body, - // but we still need to send the Content-Length header + // HEAD request: return headers only if method == "HEAD" { let mut response = warp::reply::Response::new(Body::empty()); let mut header_map = HeaderMap::new(); - header_map.insert("Content-Length", HeaderValue::from(object_range.len)); + header_map.insert("Content-Length", HeaderValue::from(size)); let headers = response.headers_mut(); headers.extend(header_map); return Ok(response); } - let mut response = warp::reply::Response::new(object_range.body); + let mut response = warp::reply::Response::new(Body::from(retrieved_data)); let mut header_map = HeaderMap::new(); - if object_range.len < object_range.size { - *response.status_mut() = StatusCode::PARTIAL_CONTENT; - header_map.insert( - "Content-Range", - HeaderValue::from_str(&format!( - "bytes {}-{}/{}", - object_range.start, object_range.end, object_range.size - )) - .unwrap(), - ); - } else { - header_map.insert("Accept-Ranges", HeaderValue::from_str("bytes").unwrap()); - } - header_map.insert("Content-Length", HeaderValue::from(object_range.len)); + header_map.insert("Content-Length", HeaderValue::from(size)); let content_type = object .metadata @@ -864,7 +642,7 @@ async fn handle_object_download( headers.extend(header_map); COUNTER_BLOBS_DOWNLOADED.inc(); - COUNTER_BYTES_DOWNLOADED.inc_by(object_range.len); + COUNTER_BYTES_DOWNLOADED.inc_by(size); HISTOGRAM_DOWNLOAD_TIME.observe(start_time.elapsed().as_secs_f64()); Ok(response) @@ -874,10 +652,9 @@ async fn handle_object_download( } /// Handle direct blob download by querying the blobs actor. -async fn handle_blob_download( +async fn handle_blob_download( blob_hash_str: String, method: String, - range: Option, height_query: HeightQuery, client: F, iroh: BlobsClient, @@ -908,7 +685,7 @@ async fn handle_blob_download( let start_time = Instant::now(); // Query the blobs actor to get blob info - let maybe_blob = blob_get(client, blob_hash, height).await.map_err(|e| { + let maybe_blob = blob_get(client.clone(), blob_hash, height).await.map_err(|e| { Rejection::from(BadRequest { message: format!("blobs actor query error: {}", e), }) @@ -916,122 +693,52 @@ async fn handle_blob_download( match maybe_blob { Some(blob) => { - // The blob hash from blobs actor is the hash sequence hash - // We need to parse it to get the original content hash - let hash_seq_hash = Hash::from_bytes(blob_hash.0); let size = blob.size; - debug!( - "blob download: hash_seq_hash={}, size={}", - hash_seq_hash, size - ); + debug!("blob download: hash={}, size={}", blob_hash, size); - // Read the hash sequence to get the original content hash - let hash_seq_bytes = iroh.read_to_bytes(hash_seq_hash).await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to read hash sequence: {} {}", hash_seq_hash, e), - }) - })?; - - let hash_seq = HashSeq::try_from(hash_seq_bytes).map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to parse hash sequence: {}", e), - }) - })?; + // Retrieve blob by fetching shards from assigned nodes and RS-decoding + let gateway = BlobGateway::new(client, 10, std::time::Duration::from_secs(5)); + let (nodes, node_directory, node_rpc_directory) = + build_node_directories(&gateway).await.map_err(|e| { + Rejection::from(BadRequest { + message: format!("failed to build node directories: {}", e), + }) + })?; - // First hash in the sequence is the original content - let orig_hash = hash_seq.iter().next().ok_or_else(|| { + let blob_id = BlobId(blob_hash.0); + let retrieved_data = retrieve( + &BlobRetrievalParams { + blob_id, + original_len: size as usize, + data_shards: DEFAULT_DATA_SHARDS, + parity_shards: DEFAULT_PARITY_SHARDS, + nodes, + node_directory, + node_rpc_directory, + }, + &iroh, + ) + .await + .map_err(|e| { Rejection::from(BadRequest { - message: "hash sequence is empty".to_string(), + message: format!("failed to retrieve blob: {}", e), }) })?; - debug!("parsed orig_hash from hash sequence: {}", orig_hash); - - let object_range = match range { - Some(range) => { - let (first_byte, last_byte) = get_range_params(range, size).map_err(|e| { - Rejection::from(BadRequest { - message: e.to_string(), - }) - })?; - let len = (last_byte - first_byte) + 1; - - // Use read_at for range requests on the original content - use iroh_blobs::rpc::client::blobs::ReadAtLen; - let read_len = ReadAtLen::AtMost(len); - let bytes = iroh - .read_at_to_bytes(orig_hash, first_byte, read_len) - .await - .map_err(|e| { - Rejection::from(BadRequest { - message: format!( - "failed to read blob at range: {} {}", - orig_hash, e - ), - }) - })?; - - let body = Body::from(bytes); - ObjectRange { - start: first_byte, - end: last_byte, - len, - size, - body, - } - } - None => { - // Read the entire original content blob directly from Iroh - debug!("reading original content with hash: {}", orig_hash); - - let reader = iroh.read(orig_hash).await.map_err(|e| { - Rejection::from(BadRequest { - message: format!("failed to read blob: {} {}", orig_hash, e), - }) - })?; - - let bytes_stream = reader.map(move |chunk_result: Result| { - chunk_result.map_err(|e: std::io::Error| anyhow::anyhow!(e)) - }); - - let body = Body::wrap_stream(bytes_stream); - ObjectRange { - start: 0, - end: size - 1, - len: size, - size, - body, - } - } - }; - - // If it is a HEAD request, we don't need to send the body + // HEAD request: return headers only if method == "HEAD" { let mut response = warp::reply::Response::new(Body::empty()); let mut header_map = HeaderMap::new(); - header_map.insert("Content-Length", HeaderValue::from(object_range.len)); + header_map.insert("Content-Length", HeaderValue::from(size)); let headers = response.headers_mut(); headers.extend(header_map); return Ok(response); } - let mut response = warp::reply::Response::new(object_range.body); + let mut response = warp::reply::Response::new(Body::from(retrieved_data)); let mut header_map = HeaderMap::new(); - if object_range.len < object_range.size { - *response.status_mut() = StatusCode::PARTIAL_CONTENT; - header_map.insert( - "Content-Range", - HeaderValue::from_str(&format!( - "bytes {}-{}/{}", - object_range.start, object_range.end, object_range.size - )) - .unwrap(), - ); - } else { - header_map.insert("Accept-Ranges", HeaderValue::from_str("bytes").unwrap()); - } - header_map.insert("Content-Length", HeaderValue::from(object_range.len)); + header_map.insert("Content-Length", HeaderValue::from(size)); header_map.insert( "Content-Type", HeaderValue::from_str("application/octet-stream").unwrap(), @@ -1041,7 +748,7 @@ async fn handle_blob_download( headers.extend(header_map); COUNTER_BLOBS_DOWNLOADED.inc(); - COUNTER_BYTES_DOWNLOADED.inc_by(object_range.len); + COUNTER_BYTES_DOWNLOADED.inc_by(size); HISTOGRAM_DOWNLOAD_TIME.observe(start_time.elapsed().as_secs_f64()); Ok(response) @@ -1162,43 +869,3 @@ fn get_filename_with_extension(filename: &str, content_type: &str) -> Option last - let _ = get_range_params("bytes=50-0".into(), 100).is_err(); - // first >= size - let _ = get_range_params("bytes=100-".into(), 100).is_err(); - // first == last - let (first, last) = get_range_params("bytes=0-0".into(), 100).unwrap(); - assert_eq!(first, 0); - assert_eq!(last, 0); - // exact range given - let (first, last) = get_range_params("bytes=0-50".into(), 100).unwrap(); - assert_eq!(first, 0); - assert_eq!(last, 50); - // only end given, this means "give me last 50 bytes" - let (first, last) = get_range_params("bytes=-50".into(), 100).unwrap(); - assert_eq!(first, 50); - assert_eq!(last, 99); - // only start given, this means "give me everything but the first 50 bytes" - let (first, last) = get_range_params("bytes=50-".into(), 100).unwrap(); - assert_eq!(first, 50); - assert_eq!(last, 99); - // neither given, this means "give me everything" - let (first, last) = get_range_params("bytes=-".into(), 100).unwrap(); - assert_eq!(first, 0); - assert_eq!(last, 99); - // last >= size - let (first, last) = get_range_params("bytes=50-100".into(), 100).unwrap(); - assert_eq!(first, 50); - assert_eq!(last, 99); - } -} diff --git a/ipc-storage/ipc-decentralized-storage/src/retrieval.rs b/ipc-storage/ipc-decentralized-storage/src/retrieval.rs new file mode 100644 index 000000000..f00f00c76 --- /dev/null +++ b/ipc-storage/ipc-decentralized-storage/src/retrieval.rs @@ -0,0 +1,257 @@ +// Copyright 2025 Recall Contributors +// SPDX-License-Identifier: Apache-2.0, MIT + +//! Data retrieval module for erasure-encoded shard fetching and decoding. +//! +//! Derives the shard-to-node mapping from on-chain parameters, fetches +//! at least k shards per chunk from the assigned nodes, and RS-decodes +//! to recover the original data. + +use anyhow::{Context, Result}; +use erasure_encoding::{ + decode_chunks, shard_node, BlobId, ChunkRecoveryInput, NodeId, ReedSolomonEncoder, Shard, + DEFAULT_MAX_CHUNK_SIZE, +}; +use iroh::NodeAddr; +use iroh_blobs::Hash; +use iroh_manager::BlobsClient; +use tracing::{debug, info, warn}; + +use std::str::FromStr; + +use crate::distribution::{shard_key, NodeDirectory, NodeRpcDirectory}; + +/// On-chain blob metadata needed for retrieval. +/// Corresponds to the StorageCommitment fields from DESIGN.md. +pub struct BlobRetrievalParams { + pub blob_id: BlobId, + pub original_len: usize, + /// Number of data shards per chunk (k). + pub data_shards: usize, + /// Number of parity shards per chunk (m). + pub parity_shards: usize, + /// Node list at encoding epoch. + pub nodes: Vec, + /// Mapping from NodeId to Iroh NodeAddr. + pub node_directory: NodeDirectory, + /// Mapping from NodeId to RPC URL (for shard hash lookups). + pub node_rpc_directory: NodeRpcDirectory, +} + +impl BlobRetrievalParams { + pub fn num_chunks(&self) -> usize { + self.original_len.div_ceil(DEFAULT_MAX_CHUNK_SIZE) + } + + pub fn chunk_data_len(&self, chunk_index: usize) -> usize { + let start = chunk_index * DEFAULT_MAX_CHUNK_SIZE; + DEFAULT_MAX_CHUNK_SIZE.min(self.original_len - start) + } + + pub fn shards_per_chunk(&self) -> usize { + self.data_shards + self.parity_shards + } +} + +/// Retrieve and decode a blob from the network. +/// +/// 1. Derive chunk structure from original_len and MAX_CHUNK_SIZE +/// 2. For each chunk, compute shard→node mapping via shard_node() +/// 3. Fetch shards from assigned nodes (need only k, try all k+m, early-exit) +/// 4. RS-decode each chunk, concatenate, truncate to original_len +pub async fn retrieve(params: &BlobRetrievalParams, local_blobs: &BlobsClient) -> Result> { + let num_chunks = params.num_chunks(); + let shards_per_chunk = params.shards_per_chunk(); + + info!( + "Retrieving blob: {} chunks, k={}, m={}, original_len={}", + num_chunks, params.data_shards, params.parity_shards, params.original_len + ); + + let mut recovery_inputs = Vec::with_capacity(num_chunks); + + for chunk_idx in 0..num_chunks { + let chunk_data_len = params.chunk_data_len(chunk_idx); + let mut fetched_shards = Vec::new(); + + for shard_idx in 0..shards_per_chunk { + let node = shard_node( + ¶ms.blob_id, + chunk_idx, + shard_idx, + shards_per_chunk, + ¶ms.nodes, + ); + + let node_addr = params.node_directory.get(&node); + let node_rpc_url = params.node_rpc_directory.get(&node).map(|s| s.as_str()); + + match fetch_shard(local_blobs, ¶ms.blob_id, chunk_idx, shard_idx, node_addr, node_rpc_url).await { + Ok(data) => { + fetched_shards.push(Shard { + index: shard_idx, + data, + }); + if fetched_shards.len() >= params.data_shards { + debug!( + "Chunk {}: collected {} shards (k={}), sufficient", + chunk_idx, + fetched_shards.len(), + params.data_shards + ); + break; + } + } + Err(e) => { + warn!( + "Failed to fetch shard {}/{} from node {:?}: {}", + chunk_idx, shard_idx, node, e + ); + let remaining = shards_per_chunk - (shard_idx + 1); + if fetched_shards.len() + remaining < params.data_shards { + anyhow::bail!( + "Chunk {}: cannot collect enough shards. Have {}, need {}, {} remaining", + chunk_idx, + fetched_shards.len(), + params.data_shards, + remaining + ); + } + } + } + } + + if fetched_shards.len() < params.data_shards { + anyhow::bail!( + "Chunk {}: only fetched {} shards, need {}", + chunk_idx, + fetched_shards.len(), + params.data_shards + ); + } + + recovery_inputs.push(ChunkRecoveryInput { + chunk_index: chunk_idx, + original_data_len: chunk_data_len, + shards: fetched_shards, + num_data_shards: params.data_shards, + num_parity_shards: params.parity_shards, + }); + } + + let recovered = decode_chunks::(&mut recovery_inputs, params.original_len)?; + + info!("Successfully retrieved and decoded {} bytes", recovered.len()); + Ok(recovered) +} + +/// Fetch a single shard, first checking local Iroh then downloading from the remote node. +async fn fetch_shard( + blobs: &BlobsClient, + blob_id: &BlobId, + chunk_index: usize, + shard_index: usize, + _node_addr: Option<&NodeAddr>, + node_rpc_url: Option<&str>, +) -> Result> { + let tag = shard_key(blob_id, chunk_index, shard_index); + + // Try local first — shard may already be cached + if let Ok(hash) = tag_to_hash_lookup(blobs, &tag).await { + if let Ok(bytes) = blobs.read_to_bytes(hash).await { + info!( + "Shard {}/{} found locally (tag={}, hash={}, size={})", + chunk_index, shard_index, tag, hash, bytes.len() + ); + return Ok(bytes.to_vec()); + } + } + + // Download from the assigned node + let rpc_url = node_rpc_url + .ok_or_else(|| anyhow::anyhow!("No RPC URL for shard {}", tag))?; + + // Query the node's RPC for the shard's Iroh hash and NodeAddr + let (hash, source_addr) = query_shard_hash(rpc_url, blob_id, chunk_index, shard_index).await?; + + blobs + .download_with_opts( + hash, + iroh_blobs::rpc::client::blobs::DownloadOptions { + format: iroh_blobs::BlobFormat::Raw, + nodes: vec![source_addr], + tag: iroh_blobs::util::SetTagOption::Named(iroh_blobs::Tag(tag.into())), + mode: iroh_blobs::rpc::client::blobs::DownloadMode::Queued, + }, + ) + .await + .context("failed to start shard download")? + .finish() + .await + .context("shard download did not complete")?; + + let bytes = blobs.read_to_bytes(hash).await.context("failed to read downloaded shard")?; + Ok(bytes.to_vec()) +} + +/// Look up a locally-stored blob hash by its named tag. +pub async fn tag_to_hash_lookup(blobs: &BlobsClient, tag: &str) -> Result { + use futures::StreamExt; + let iroh_tag = iroh_blobs::Tag(tag.to_string().into()); + let mut tags = blobs.tags().list().await?; + while let Some(Ok(tag_info)) = tags.next().await { + if tag_info.name == iroh_tag { + return Ok(tag_info.hash); + } + } + anyhow::bail!("tag not found: {}", tag) +} + +/// Response from the shard hash lookup endpoint. +#[derive(serde::Deserialize)] +struct ShardHashResponse { + hash: String, + node_addr: NodeAddr, +} + +/// Query a storage node's RPC to get the Iroh hash for a specific shard. +async fn query_shard_hash( + rpc_url: &str, + blob_id: &BlobId, + chunk_index: usize, + shard_index: usize, +) -> Result<(Hash, NodeAddr)> { + let blob_id_hex = hex::encode(blob_id.0); + let url = format!( + "{}/v1/shards/{}/{}/{}/hash", + rpc_url.trim_end_matches('/'), + blob_id_hex, + chunk_index, + shard_index + ); + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(10)) + .build() + .context("failed to create HTTP client")?; + + let resp = client + .get(&url) + .send() + .await + .with_context(|| format!("failed to query shard hash from {}", url))?; + + if !resp.status().is_success() { + anyhow::bail!("shard hash lookup failed: HTTP {}", resp.status()); + } + + let response: ShardHashResponse = resp + .json() + .await + .context("failed to parse shard hash response")?; + + let hash = Hash::from_str(&response.hash) + .map_err(|_| anyhow::anyhow!("invalid hash in shard lookup response"))?; + + Ok((hash, response.node_addr)) +} diff --git a/ipc-storage/ipc-dropbox/src/components/FileExplorer.tsx b/ipc-storage/ipc-dropbox/src/components/FileExplorer.tsx index 51301ed70..b72ce7d66 100644 --- a/ipc-storage/ipc-dropbox/src/components/FileExplorer.tsx +++ b/ipc-storage/ipc-dropbox/src/components/FileExplorer.tsx @@ -14,7 +14,7 @@ interface FileExplorerProps { onNavigateToFolder: (path: string) => void; onNavigateUp: () => void; onRefresh: () => void; - onUpload: (file: File, targetPath: string) => Promise; + onUpload: (file: File, targetPath: string, dataShards: number, parityShards: number) => Promise; onDownload: (blobHash: string, fileName: string) => Promise; onDelete: (key: string) => Promise; onFetchFiles: (prefix: string) => void; @@ -41,25 +41,41 @@ export function FileExplorer({ const fileInputRef = useRef(null); const [newFolderName, setNewFolderName] = useState(''); const [showNewFolderInput, setShowNewFolderInput] = useState(false); + const [pendingFile, setPendingFile] = useState(null); + const [showUploadConfig, setShowUploadConfig] = useState(false); + const [dataShards, setDataShards] = useState(4); + const [parityShards, setParityShards] = useState(2); useEffect(() => { onFetchFiles(currentPath); }, [onFetchFiles, currentPath]); - const handleFileSelect = async (e: React.ChangeEvent) => { + const handleFileSelect = (e: React.ChangeEvent) => { const file = e.target.files?.[0]; if (file) { - const success = await onUpload(file, currentPath); - if (success) { - onRefresh(); - } + setPendingFile(file); + setShowUploadConfig(true); } - // Reset input if (fileInputRef.current) { fileInputRef.current.value = ''; } }; + const handleUploadConfirm = async () => { + if (!pendingFile) return; + setShowUploadConfig(false); + const success = await onUpload(pendingFile, currentPath, dataShards, parityShards); + if (success) { + onRefresh(); + } + setPendingFile(null); + }; + + const handleUploadCancel = () => { + setShowUploadConfig(false); + setPendingFile(null); + }; + const handleCreateFolder = () => { if (newFolderName.trim()) { const folderPath = currentPath + newFolderName.trim() + '/'; @@ -72,10 +88,10 @@ export function FileExplorer({ const formatSize = (size?: bigint) => { if (!size) return '-'; const bytes = Number(size); - if (bytes < 1024) return `${bytes} B`; - if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; - if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; - return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`; + if (bytes < 1000) return `${bytes} B`; + if (bytes < 1000 * 1000) return `${(bytes / 1000).toFixed(1)} KB`; + if (bytes < 1000 * 1000 * 1000) return `${(bytes / (1000 * 1000)).toFixed(1)} MB`; + return `${(bytes / (1000 * 1000 * 1000)).toFixed(1)} GB`; }; const getBreadcrumbs = () => { @@ -172,6 +188,44 @@ export function FileExplorer({ )} + {showUploadConfig && pendingFile && ( +
+
+ Upload: {pendingFile.name} +
+
+ + +
+
+ + +
+
+ )} + {(error || uploadError || deleteError) && (

{error || uploadError || deleteError}

)} diff --git a/ipc-storage/ipc-dropbox/src/hooks/useDownload.ts b/ipc-storage/ipc-dropbox/src/hooks/useDownload.ts index 8326f34ac..335f3b110 100644 --- a/ipc-storage/ipc-dropbox/src/hooks/useDownload.ts +++ b/ipc-storage/ipc-dropbox/src/hooks/useDownload.ts @@ -15,9 +15,9 @@ export function useDownload() { // Remove 0x prefix if present const hash = blobHash.startsWith('0x') ? blobHash.slice(2) : blobHash; - console.log('Fetching from:', `${config.nodeOperationObjectApi}/v1/blobs/${hash}/content`); + console.log('Fetching from:', `${config.objectsListenAddr}/v1/blobs/${hash}`); - const response = await fetch(`${config.nodeOperationObjectApi}/v1/blobs/${hash}/content`); + const response = await fetch(`${config.objectsListenAddr}/v1/blobs/${hash}`); if (!response.ok) { throw new Error(`Download failed: ${response.statusText}`); diff --git a/ipc-storage/ipc-dropbox/src/hooks/useUpload.ts b/ipc-storage/ipc-dropbox/src/hooks/useUpload.ts index 4b389e173..a44d56586 100644 --- a/ipc-storage/ipc-dropbox/src/hooks/useUpload.ts +++ b/ipc-storage/ipc-dropbox/src/hooks/useUpload.ts @@ -48,7 +48,7 @@ export function useUpload(signer: ethers.Signer | null, bucketAddress: string | return false; }, [signer]); - const uploadFile = useCallback(async (file: File, targetPath: string) => { + const uploadFile = useCallback(async (file: File, targetPath: string, dataShards: number = 4, parityShards: number = 2) => { if (!signer || !bucketAddress) { setError('Wallet or bucket not connected'); return false; @@ -109,7 +109,9 @@ export function useUpload(signer: ethers.Signer | null, bucketAddress: string | fullPath, blobHash, metadataHash, - BigInt(file.size) + BigInt(file.size), + dataShards, + parityShards ); setUploadProgress('Waiting for transaction confirmation...'); diff --git a/ipc-storage/ipc-dropbox/src/index.css b/ipc-storage/ipc-dropbox/src/index.css index 3aedc0fa0..10db23b3b 100644 --- a/ipc-storage/ipc-dropbox/src/index.css +++ b/ipc-storage/ipc-dropbox/src/index.css @@ -319,6 +319,39 @@ body { max-width: 300px; } +/* Upload Config */ +.upload-config { + padding: 1rem 1.25rem; + border-bottom: 1px solid var(--border); + background: var(--background); +} + +.upload-config-header { + margin-bottom: 0.75rem; +} + +.upload-config-fields { + display: flex; + gap: 1rem; + margin-bottom: 0.75rem; +} + +.upload-config-label { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.875rem; +} + +.upload-config-label .input { + width: 70px; +} + +.upload-config-actions { + display: flex; + gap: 0.5rem; +} + /* File List */ .file-list { overflow-x: auto; diff --git a/ipc-storage/ipc-dropbox/src/utils/contracts.ts b/ipc-storage/ipc-dropbox/src/utils/contracts.ts index dba564594..eaf4d4409 100644 --- a/ipc-storage/ipc-dropbox/src/utils/contracts.ts +++ b/ipc-storage/ipc-dropbox/src/utils/contracts.ts @@ -23,7 +23,7 @@ export const ADM_ABI = [ // ABI for Bucket Actor export const BUCKET_ABI = [ - 'function addObject(bytes32 source, string key, bytes32 hash, bytes32 recoveryHash, uint64 size)', + 'function addObject(bytes32 source, string key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint16 dataShards, uint16 parityShards)', 'function getObject(string key) view returns (tuple(bytes32 blobHash, bytes32 recoveryHash, uint64 size, uint64 expiry, tuple(string key, string value)[] metadata))', 'function deleteObject(string key)', 'function updateObjectMetadata(string key, tuple(string key, string value)[] metadata)', diff --git a/ipc-storage/sol-facade/crates/facade/src/blobs_facade/iblobsfacade.rs b/ipc-storage/sol-facade/crates/facade/src/blobs_facade/iblobsfacade.rs index 259e3e502..51f14aa5d 100644 --- a/ipc-storage/sol-facade/crates/facade/src/blobs_facade/iblobsfacade.rs +++ b/ipc-storage/sol-facade/crates/facade/src/blobs_facade/iblobsfacade.rs @@ -41,11 +41,11 @@ interface IBlobsFacade { event BlobFinalized(address indexed subscriber, bytes32 hash, bool resolved); event BlobPending(address indexed subscriber, bytes32 hash, bytes32 sourceId); - function addBlob(address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl) external; + function addBlob(address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl, uint16 dataShards, uint16 parityShards) external; function deleteBlob(address subscriber, bytes32 blobHash, string memory subscriptionId) external; function getBlob(bytes32 blobHash) external view returns (Blob memory blob); function getStats() external view returns (SubnetStats memory stats); - function overwriteBlob(bytes32 oldHash, address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl) external; + function overwriteBlob(bytes32 oldHash, address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl, uint16 dataShards, uint16 parityShards) external; function trimBlobExpiries(address subscriber, bytes32 startingHash, uint32 limit) external returns (TrimBlobExpiries memory); } ``` @@ -91,6 +91,16 @@ interface IBlobsFacade { "name": "ttl", "type": "uint64", "internalType": "uint64" + }, + { + "name": "dataShards", + "type": "uint16", + "internalType": "uint16" + }, + { + "name": "parityShards", + "type": "uint16", + "internalType": "uint16" } ], "outputs": [], @@ -295,6 +305,16 @@ interface IBlobsFacade { "name": "ttl", "type": "uint64", "internalType": "uint64" + }, + { + "name": "dataShards", + "type": "uint16", + "internalType": "uint16" + }, + { + "name": "parityShards", + "type": "uint16", + "internalType": "uint16" } ], "outputs": [], @@ -2208,9 +2228,9 @@ pub mod IBlobsFacade { } } }; - /**Function with signature `addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64)` and selector `0x5b5cc14f`. + /**Function with signature `addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)` and selector `0xac106e38`. ```solidity - function addBlob(address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl) external; + function addBlob(address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl, uint16 dataShards, uint16 parityShards) external; ```*/ #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] @@ -2229,8 +2249,12 @@ pub mod IBlobsFacade { pub size: u64, #[allow(missing_docs)] pub ttl: u64, + #[allow(missing_docs)] + pub dataShards: u16, + #[allow(missing_docs)] + pub parityShards: u16, } - ///Container type for the return parameters of the [`addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64)`](addBlobCall) function. + ///Container type for the return parameters of the [`addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)`](addBlobCall) function. #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] pub struct addBlobReturn {} @@ -2252,6 +2276,8 @@ pub mod IBlobsFacade { ::alloy_sol_types::sol_data::String, ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); #[doc(hidden)] type UnderlyingRustTuple<'a> = ( @@ -2262,6 +2288,8 @@ pub mod IBlobsFacade { ::alloy_sol_types::private::String, u64, u64, + u16, + u16, ); #[cfg(test)] #[allow(dead_code, unreachable_patterns)] @@ -2284,6 +2312,8 @@ pub mod IBlobsFacade { value.subscriptionId, value.size, value.ttl, + value.dataShards, + value.parityShards, ) } } @@ -2299,6 +2329,8 @@ pub mod IBlobsFacade { subscriptionId: tuple.4, size: tuple.5, ttl: tuple.6, + dataShards: tuple.7, + parityShards: tuple.8, } } } @@ -2342,14 +2374,16 @@ pub mod IBlobsFacade { ::alloy_sol_types::sol_data::String, ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); type Token<'a> = as alloy_sol_types::SolType>::Token<'a>; type Return = addBlobReturn; type ReturnTuple<'a> = (); type ReturnToken<'a> = as alloy_sol_types::SolType>::Token<'a>; const SIGNATURE: &'static str = - "addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64)"; - const SELECTOR: [u8; 4] = [91u8, 92u8, 193u8, 79u8]; + "addBlob(address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)"; + const SELECTOR: [u8; 4] = [172u8, 16u8, 110u8, 56u8]; #[inline] fn new<'a>( tuple: as alloy_sol_types::SolType>::RustType, @@ -2380,6 +2414,12 @@ pub mod IBlobsFacade { <::alloy_sol_types::sol_data::Uint< 64, > as alloy_sol_types::SolType>::tokenize(&self.ttl), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.dataShards), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.parityShards), ) } #[inline] @@ -2764,9 +2804,9 @@ pub mod IBlobsFacade { } } }; - /**Function with signature `overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64)` and selector `0x434fc5a4`. + /**Function with signature `overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)` and selector `0x67132023`. ```solidity - function overwriteBlob(bytes32 oldHash, address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl) external; + function overwriteBlob(bytes32 oldHash, address sponsor, bytes32 source, bytes32 blobHash, bytes32 metadataHash, string memory subscriptionId, uint64 size, uint64 ttl, uint16 dataShards, uint16 parityShards) external; ```*/ #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] @@ -2787,8 +2827,12 @@ pub mod IBlobsFacade { pub size: u64, #[allow(missing_docs)] pub ttl: u64, + #[allow(missing_docs)] + pub dataShards: u16, + #[allow(missing_docs)] + pub parityShards: u16, } - ///Container type for the return parameters of the [`overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64)`](overwriteBlobCall) function. + ///Container type for the return parameters of the [`overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)`](overwriteBlobCall) function. #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] pub struct overwriteBlobReturn {} @@ -2811,6 +2855,8 @@ pub mod IBlobsFacade { ::alloy_sol_types::sol_data::String, ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); #[doc(hidden)] type UnderlyingRustTuple<'a> = ( @@ -2822,6 +2868,8 @@ pub mod IBlobsFacade { ::alloy_sol_types::private::String, u64, u64, + u16, + u16, ); #[cfg(test)] #[allow(dead_code, unreachable_patterns)] @@ -2845,6 +2893,8 @@ pub mod IBlobsFacade { value.subscriptionId, value.size, value.ttl, + value.dataShards, + value.parityShards, ) } } @@ -2861,6 +2911,8 @@ pub mod IBlobsFacade { subscriptionId: tuple.5, size: tuple.6, ttl: tuple.7, + dataShards: tuple.8, + parityShards: tuple.9, } } } @@ -2905,14 +2957,16 @@ pub mod IBlobsFacade { ::alloy_sol_types::sol_data::String, ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); type Token<'a> = as alloy_sol_types::SolType>::Token<'a>; type Return = overwriteBlobReturn; type ReturnTuple<'a> = (); type ReturnToken<'a> = as alloy_sol_types::SolType>::Token<'a>; const SIGNATURE: &'static str = - "overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64)"; - const SELECTOR: [u8; 4] = [67u8, 79u8, 197u8, 164u8]; + "overwriteBlob(bytes32,address,bytes32,bytes32,bytes32,string,uint64,uint64,uint16,uint16)"; + const SELECTOR: [u8; 4] = [103u8, 19u8, 32u8, 35u8]; #[inline] fn new<'a>( tuple: as alloy_sol_types::SolType>::RustType, @@ -2946,6 +3000,12 @@ pub mod IBlobsFacade { <::alloy_sol_types::sol_data::Uint< 64, > as alloy_sol_types::SolType>::tokenize(&self.ttl), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.dataShards), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.parityShards), ) } #[inline] @@ -3129,10 +3189,10 @@ pub mod IBlobsFacade { /// /// Prefer using `SolInterface` methods instead. pub const SELECTORS: &'static [[u8; 4usize]] = &[ - [67u8, 79u8, 197u8, 164u8], - [91u8, 92u8, 193u8, 79u8], + [103u8, 19u8, 32u8, 35u8], [120u8, 248u8, 175u8, 133u8], [138u8, 77u8, 26u8, 212u8], + [172u8, 16u8, 110u8, 56u8], [190u8, 169u8, 1u8, 106u8], [197u8, 157u8, 72u8, 71u8], ]; @@ -3187,16 +3247,6 @@ pub mod IBlobsFacade { } overwriteBlob }, - { - fn addBlob( - data: &[u8], - validate: bool, - ) -> alloy_sol_types::Result { - ::abi_decode_raw(data, validate) - .map(IBlobsFacadeCalls::addBlob) - } - addBlob - }, { fn trimBlobExpiries( data: &[u8], @@ -3219,6 +3269,16 @@ pub mod IBlobsFacade { } getBlob }, + { + fn addBlob( + data: &[u8], + validate: bool, + ) -> alloy_sol_types::Result { + ::abi_decode_raw(data, validate) + .map(IBlobsFacadeCalls::addBlob) + } + addBlob + }, { fn deleteBlob( data: &[u8], diff --git a/ipc-storage/sol-facade/crates/facade/src/bucket_facade/ibucketfacade.rs b/ipc-storage/sol-facade/crates/facade/src/bucket_facade/ibucketfacade.rs index 1105c7f0e..87a5756e5 100644 --- a/ipc-storage/sol-facade/crates/facade/src/bucket_facade/ibucketfacade.rs +++ b/ipc-storage/sol-facade/crates/facade/src/bucket_facade/ibucketfacade.rs @@ -36,8 +36,8 @@ interface IBucketFacade { event ObjectDeleted(bytes key, bytes32 blobHash); event ObjectMetadataUpdated(bytes key, bytes metadata); - function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size) external; - function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint64 ttl, KeyValue[] memory metadata, bool overwrite) external; + function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint16 dataShards, uint16 parityShards) external; + function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint64 ttl, KeyValue[] memory metadata, bool overwrite, uint16 dataShards, uint16 parityShards) external; function deleteObject(string memory key) external; function getObject(string memory key) external view returns (ObjectValue memory); function queryObjects(string memory prefix, string memory delimiter, string memory startKey, uint64 limit) external view returns (Query memory); @@ -80,6 +80,16 @@ interface IBucketFacade { "name": "size", "type": "uint64", "internalType": "uint64" + }, + { + "name": "dataShards", + "type": "uint16", + "internalType": "uint16" + }, + { + "name": "parityShards", + "type": "uint16", + "internalType": "uint16" } ], "outputs": [], @@ -140,6 +150,16 @@ interface IBucketFacade { "name": "overwrite", "type": "bool", "internalType": "bool" + }, + { + "name": "dataShards", + "type": "uint16", + "internalType": "uint16" + }, + { + "name": "parityShards", + "type": "uint16", + "internalType": "uint16" } ], "outputs": [], @@ -2226,9 +2246,9 @@ pub mod IBucketFacade { } } }; - /**Function with signature `addObject(bytes32,string,bytes32,bytes32,uint64)` and selector `0x2d6f2550`. + /**Function with signature `addObject(bytes32,string,bytes32,bytes32,uint64,uint16,uint16)` and selector `0x9579baf9`. ```solidity - function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size) external; + function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint16 dataShards, uint16 parityShards) external; ```*/ #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] @@ -2243,8 +2263,12 @@ pub mod IBucketFacade { pub recoveryHash: ::alloy_sol_types::private::FixedBytes<32>, #[allow(missing_docs)] pub size: u64, + #[allow(missing_docs)] + pub dataShards: u16, + #[allow(missing_docs)] + pub parityShards: u16, } - ///Container type for the return parameters of the [`addObject(bytes32,string,bytes32,bytes32,uint64)`](addObject_0Call) function. + ///Container type for the return parameters of the [`addObject(bytes32,string,bytes32,bytes32,uint64,uint16,uint16)`](addObject_0Call) function. #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] pub struct addObject_0Return {} @@ -2264,6 +2288,8 @@ pub mod IBucketFacade { ::alloy_sol_types::sol_data::FixedBytes<32>, ::alloy_sol_types::sol_data::FixedBytes<32>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); #[doc(hidden)] type UnderlyingRustTuple<'a> = ( @@ -2272,6 +2298,8 @@ pub mod IBucketFacade { ::alloy_sol_types::private::FixedBytes<32>, ::alloy_sol_types::private::FixedBytes<32>, u64, + u16, + u16, ); #[cfg(test)] #[allow(dead_code, unreachable_patterns)] @@ -2292,6 +2320,8 @@ pub mod IBucketFacade { value.hash, value.recoveryHash, value.size, + value.dataShards, + value.parityShards, ) } } @@ -2305,6 +2335,8 @@ pub mod IBucketFacade { hash: tuple.2, recoveryHash: tuple.3, size: tuple.4, + dataShards: tuple.5, + parityShards: tuple.6, } } } @@ -2346,13 +2378,15 @@ pub mod IBucketFacade { ::alloy_sol_types::sol_data::FixedBytes<32>, ::alloy_sol_types::sol_data::FixedBytes<32>, ::alloy_sol_types::sol_data::Uint<64>, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); type Token<'a> = as alloy_sol_types::SolType>::Token<'a>; type Return = addObject_0Return; type ReturnTuple<'a> = (); type ReturnToken<'a> = as alloy_sol_types::SolType>::Token<'a>; - const SIGNATURE: &'static str = "addObject(bytes32,string,bytes32,bytes32,uint64)"; - const SELECTOR: [u8; 4] = [45u8, 111u8, 37u8, 80u8]; + const SIGNATURE: &'static str = "addObject(bytes32,string,bytes32,bytes32,uint64,uint16,uint16)"; + const SELECTOR: [u8; 4] = [149u8, 121u8, 186u8, 249u8]; #[inline] fn new<'a>( tuple: as alloy_sol_types::SolType>::RustType, @@ -2377,6 +2411,12 @@ pub mod IBucketFacade { <::alloy_sol_types::sol_data::Uint< 64, > as alloy_sol_types::SolType>::tokenize(&self.size), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.dataShards), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.parityShards), ) } #[inline] @@ -2391,9 +2431,9 @@ pub mod IBucketFacade { } } }; - /**Function with signature `addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool)` and selector `0x774343fe`. + /**Function with signature `addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool,uint16,uint16)` and selector `0x5f404e39`. ```solidity - function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint64 ttl, KeyValue[] memory metadata, bool overwrite) external; + function addObject(bytes32 source, string memory key, bytes32 hash, bytes32 recoveryHash, uint64 size, uint64 ttl, KeyValue[] memory metadata, bool overwrite, uint16 dataShards, uint16 parityShards) external; ```*/ #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] @@ -2415,8 +2455,12 @@ pub mod IBucketFacade { ::alloy_sol_types::private::Vec<::RustType>, #[allow(missing_docs)] pub overwrite: bool, + #[allow(missing_docs)] + pub dataShards: u16, + #[allow(missing_docs)] + pub parityShards: u16, } - ///Container type for the return parameters of the [`addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool)`](addObject_1Call) function. + ///Container type for the return parameters of the [`addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool,uint16,uint16)`](addObject_1Call) function. #[allow(non_camel_case_types, non_snake_case, clippy::pub_underscore_fields)] #[derive(Clone)] pub struct addObject_1Return {} @@ -2439,6 +2483,8 @@ pub mod IBucketFacade { ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Array, ::alloy_sol_types::sol_data::Bool, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); #[doc(hidden)] type UnderlyingRustTuple<'a> = ( @@ -2450,6 +2496,8 @@ pub mod IBucketFacade { u64, ::alloy_sol_types::private::Vec<::RustType>, bool, + u16, + u16, ); #[cfg(test)] #[allow(dead_code, unreachable_patterns)] @@ -2473,6 +2521,8 @@ pub mod IBucketFacade { value.ttl, value.metadata, value.overwrite, + value.dataShards, + value.parityShards, ) } } @@ -2489,6 +2539,8 @@ pub mod IBucketFacade { ttl: tuple.5, metadata: tuple.6, overwrite: tuple.7, + dataShards: tuple.8, + parityShards: tuple.9, } } } @@ -2533,14 +2585,16 @@ pub mod IBucketFacade { ::alloy_sol_types::sol_data::Uint<64>, ::alloy_sol_types::sol_data::Array, ::alloy_sol_types::sol_data::Bool, + ::alloy_sol_types::sol_data::Uint<16>, + ::alloy_sol_types::sol_data::Uint<16>, ); type Token<'a> = as alloy_sol_types::SolType>::Token<'a>; type Return = addObject_1Return; type ReturnTuple<'a> = (); type ReturnToken<'a> = as alloy_sol_types::SolType>::Token<'a>; const SIGNATURE: &'static str = - "addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool)"; - const SELECTOR: [u8; 4] = [119u8, 67u8, 67u8, 254u8]; + "addObject(bytes32,string,bytes32,bytes32,uint64,uint64,(string,string)[],bool,uint16,uint16)"; + const SELECTOR: [u8; 4] = [95u8, 64u8, 78u8, 57u8]; #[inline] fn new<'a>( tuple: as alloy_sol_types::SolType>::RustType, @@ -2574,6 +2628,12 @@ pub mod IBucketFacade { <::alloy_sol_types::sol_data::Bool as alloy_sol_types::SolType>::tokenize( &self.overwrite, ), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.dataShards), + <::alloy_sol_types::sol_data::Uint< + 16, + > as alloy_sol_types::SolType>::tokenize(&self.parityShards), ) } #[inline] @@ -3649,12 +3709,12 @@ pub mod IBucketFacade { pub const SELECTORS: &'static [[u8; 4usize]] = &[ [1u8, 83u8, 234u8, 145u8], [23u8, 211u8, 82u8, 192u8], - [45u8, 111u8, 37u8, 80u8], [45u8, 124u8, 182u8, 0u8], [76u8, 83u8, 234u8, 181u8], + [95u8, 64u8, 78u8, 57u8], [98u8, 148u8, 233u8, 163u8], [111u8, 10u8, 79u8, 244u8], - [119u8, 67u8, 67u8, 254u8], + [149u8, 121u8, 186u8, 249u8], [164u8, 67u8, 168u8, 63u8], [201u8, 174u8, 239u8, 129u8], ]; @@ -3733,18 +3793,6 @@ pub mod IBucketFacade { } queryObjects_0 }, - { - fn addObject_0( - data: &[u8], - validate: bool, - ) -> alloy_sol_types::Result { - ::abi_decode_raw( - data, validate, - ) - .map(IBucketFacadeCalls::addObject_0) - } - addObject_0 - }, { fn deleteObject( data: &[u8], @@ -3769,6 +3817,18 @@ pub mod IBucketFacade { } queryObjects_1 }, + { + fn addObject_1( + data: &[u8], + validate: bool, + ) -> alloy_sol_types::Result { + ::abi_decode_raw( + data, validate, + ) + .map(IBucketFacadeCalls::addObject_1) + } + addObject_1 + }, { fn queryObjects_2( data: &[u8], @@ -3794,16 +3854,16 @@ pub mod IBucketFacade { updateObjectMetadata }, { - fn addObject_1( + fn addObject_0( data: &[u8], validate: bool, ) -> alloy_sol_types::Result { - ::abi_decode_raw( + ::abi_decode_raw( data, validate, ) - .map(IBucketFacadeCalls::addObject_1) + .map(IBucketFacadeCalls::addObject_0) } - addObject_1 + addObject_0 }, { fn queryObjects_3(