bp_core/storage/
manifest.rs

1//! File manifest — per-file metadata for the BillPouch distributed FS.
2//!
3//! ## Overview
4//!
5//! Every file uploaded to a BillPouch network is described by a [`FileManifest`].
6//! The manifest is stored on the network (gossipped or fetched on demand) and
7//! describes:
8//!
9//! - The chunking and coding parameters (`k`, `n`, `q`, `ph`, `pe`).
10//! - Where each fragment lives (which Pouch peer holds it).
11//! - File metadata (name, size) encrypted with the **network metadata key**.
12//!
13//! ## Network metadata key
14//!
15//! Each network has a 32-byte **random** secret key stored locally in
16//! `~/.local/share/billpouch/network_keys.json`.  The key is **not** derived
17//! from the network name — knowing `network_id` alone gives no information
18//! about the key.  Keys are distributed to new members exclusively via
19//! signed+encrypted invite tokens (see the invite subsystem).
20//!
21//! ## Chunk encryption
22//!
23//! Each chunk is encrypted with a **per-user CEK** (Content Encryption Key)
24//! before RLNC encoding.  The CEK is derived from the owner’s Ed25519
25//! secret material and a hash of the plaintext chunk, so Pouch nodes holding
26//! fragments never have access to plaintext data and cannot read files
27//! belonging to other users even if they share the same network.
28//!
29//! ```text
30//! cek = BLAKE3_keyed(identity.secret_material(),
31//!                    "billpouch/cek/v1" || BLAKE3(plaintext_chunk))
32//! ```
33//!
34//! ## File upload pipeline
35//!
36//! ```text
37//! File (user data)
38//!   │
39//!   ▼ 1. Chunking  (chunk_size bytes each)
40//!   │
41//!   ▼ 2. Encrypt each chunk  (ChunkCipher::for_user — CEK from identity + plaintext hash)
42//!   │      chunk_id = BLAKE3(encrypted_chunk)[0..16]
43//!   │
44//!   ▼ 3. RLNC encode   k → n fragments per encrypted chunk
45//!   │      k = compute_coding_params(stabilities, ph, q_target).k
46//!   │
47//!   ▼ 4. Distribute one fragment per Pouch peer
48//!         (Pouches only hold ciphertext fragments — never plaintext)
49//! ```
50//!
51//! ## File retrieval pipeline
52//!
53//! ```text
54//! Request propagates via gossip (tree expansion)
55//!   │
56//!   ▼  per chunk: collect ≥ k fragments from Pouch peers
57//!   ▼  RLNC decode  → encrypted chunk
58//!   ▼  ChunkCipher::for_user (re-derived from identity + stored plaintext hash) → plaintext chunk
59//!   ▼  reassemble chunks → file
60//! ```
61
62use crate::{
63    config,
64    error::{BpError, BpResult},
65};
66use rand::RngCore;
67use serde::{Deserialize, Serialize};
68use std::collections::HashMap;
69
70// ── Network metadata key ──────────────────────────────────────────────────────
71
72/// 32-byte symmetric key shared by all nodes in a given network.
73///
74/// **This key is a randomly generated secret** — it is never derived from the
75/// network name.  It is stored locally in `network_keys.json` and distributed
76/// to new members *exclusively* via signed+encrypted invite tokens.
77///
78/// Used to encrypt/decrypt file metadata (names, …) stored in manifests.
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct NetworkMetaKey(pub [u8; 32]);
81
82impl NetworkMetaKey {
83    /// Generate a new random 32-byte network key.
84    ///
85    /// Call once when *creating* a network.  The key must then be distributed
86    /// via invite tokens to all other members.
87    pub fn generate() -> Self {
88        let mut key = [0u8; 32];
89        rand::thread_rng().fill_bytes(&mut key);
90        Self(key)
91    }
92
93    /// Load the key for `network_id` from local storage.
94    ///
95    /// Returns `None` if this node has never joined that network
96    /// (i.e. no key has been stored yet).
97    pub fn load(network_id: &str) -> BpResult<Option<Self>> {
98        let path = config::network_keys_path()?;
99        if !path.exists() {
100            return Ok(None);
101        }
102        let json = std::fs::read_to_string(&path).map_err(BpError::Io)?;
103        let map: HashMap<String, String> = serde_json::from_str(&json)?;
104        match map.get(network_id) {
105            None => Ok(None),
106            Some(hex_key) => {
107                let bytes = hex::decode(hex_key).map_err(|e| {
108                    BpError::Config(format!("Invalid network key for '{network_id}': {e}"))
109                })?;
110                if bytes.len() != 32 {
111                    return Err(BpError::Config(format!(
112                        "Network key for '{network_id}' has wrong length: {}",
113                        bytes.len()
114                    )));
115                }
116                let mut arr = [0u8; 32];
117                arr.copy_from_slice(&bytes);
118                Ok(Some(Self(arr)))
119            }
120        }
121    }
122
123    /// Persist this key for `network_id` to local storage.
124    ///
125    /// Safe to call multiple times — overwrites an existing entry for the
126    /// same `network_id`.  Never logs or surfaces the key bytes.
127    pub fn save(&self, network_id: &str) -> BpResult<()> {
128        config::ensure_dirs()?;
129        let path = config::network_keys_path()?;
130        let mut map: HashMap<String, String> = if path.exists() {
131            let json = std::fs::read_to_string(&path).map_err(BpError::Io)?;
132            serde_json::from_str(&json)?
133        } else {
134            HashMap::new()
135        };
136        map.insert(network_id.to_string(), hex::encode(self.0));
137        let json = serde_json::to_string_pretty(&map)?;
138        std::fs::write(&path, json).map_err(BpError::Io)?;
139        Ok(())
140    }
141
142    /// Load the key for `network_id`, generating and saving a new one if absent.
143    ///
144    /// This is the production entry point used by the daemon when it joins or
145    /// creates a network.
146    pub fn load_or_create(network_id: &str) -> BpResult<Self> {
147        if let Some(key) = Self::load(network_id)? {
148            return Ok(key);
149        }
150        let key = Self::generate();
151        key.save(network_id)?;
152        tracing::info!(network = %network_id, "Generated new NetworkMetaKey");
153        Ok(key)
154    }
155
156    /// **Test/legacy only** — derive a key deterministically from `network_id`.
157    ///
158    /// <div class="warning">
159    /// This is <strong>NOT secure</strong> for production use: anyone who knows
160    /// the network name can compute this key.  Only used in unit tests and
161    /// legacy compatibility paths.
162    /// </div>
163    #[doc(hidden)]
164    pub fn for_network(network_id: &str) -> Self {
165        let mut h = blake3::Hasher::new();
166        h.update(b"billpouch/meta/v1");
167        h.update(network_id.as_bytes());
168        Self(*h.finalize().as_bytes())
169    }
170
171    /// Encrypt `plaintext` and return an authenticated ciphertext blob.
172    ///
173    /// Format: `nonce(16) || mac(32) || ciphertext`.
174    pub fn encrypt(&self, plaintext: &[u8]) -> Vec<u8> {
175        let mut nonce = [0u8; 16];
176        rand::thread_rng().fill_bytes(&mut nonce);
177
178        let keystream = self.keystream(&nonce, plaintext.len());
179        let ciphertext: Vec<u8> = plaintext
180            .iter()
181            .zip(keystream.iter())
182            .map(|(a, b)| a ^ b)
183            .collect();
184
185        let mac = self.mac(&nonce, &ciphertext);
186
187        let mut out = Vec::with_capacity(16 + 32 + ciphertext.len());
188        out.extend_from_slice(&nonce);
189        out.extend_from_slice(&mac);
190        out.extend_from_slice(&ciphertext);
191        out
192    }
193
194    /// Decrypt and authenticate a blob produced by [`encrypt`](Self::encrypt).
195    ///
196    /// Returns the plaintext or an error if the MAC is invalid or the blob is
197    /// too short.
198    pub fn decrypt(&self, blob: &[u8]) -> BpResult<Vec<u8>> {
199        if blob.len() < 48 {
200            return Err(BpError::Coding(
201                "Encrypted metadata blob is too short".into(),
202            ));
203        }
204        let nonce: [u8; 16] = blob[..16].try_into().unwrap();
205        let stored_mac: [u8; 32] = blob[16..48].try_into().unwrap();
206        let ciphertext = &blob[48..];
207
208        let expected_mac = self.mac(&nonce, ciphertext);
209        if expected_mac != stored_mac {
210            return Err(BpError::Coding("Metadata MAC verification failed".into()));
211        }
212
213        let keystream = self.keystream(&nonce, ciphertext.len());
214        let plaintext: Vec<u8> = ciphertext
215            .iter()
216            .zip(keystream.iter())
217            .map(|(a, b)| a ^ b)
218            .collect();
219
220        Ok(plaintext)
221    }
222
223    // ── Internals ─────────────────────────────────────────────────────────
224
225    /// Generate a keystream of `length` bytes using BLAKE3 in counter mode.
226    fn keystream(&self, nonce: &[u8; 16], length: usize) -> Vec<u8> {
227        let mut ks = Vec::with_capacity(length + 32);
228        let mut counter = 0u64;
229        while ks.len() < length {
230            let mut h = blake3::Hasher::new_keyed(&self.0);
231            h.update(b"ks");
232            h.update(nonce);
233            h.update(&counter.to_le_bytes());
234            ks.extend_from_slice(h.finalize().as_bytes());
235            counter += 1;
236        }
237        ks.truncate(length);
238        ks
239    }
240
241    /// Compute a BLAKE3-keyed MAC over `nonce || ciphertext`.
242    fn mac(&self, nonce: &[u8; 16], ciphertext: &[u8]) -> [u8; 32] {
243        let mut h = blake3::Hasher::new_keyed(&self.0);
244        h.update(b"mac");
245        h.update(nonce);
246        h.update(ciphertext);
247        *h.finalize().as_bytes()
248    }
249}
250
251// ── Fragment location ─────────────────────────────────────────────────────────
252
253/// Location of a single RLNC fragment in the network.
254#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
255pub struct FragmentLocation {
256    /// UUID of the fragment (matches `EncodedFragment::id`).
257    pub fragment_id: String,
258    /// libp2p PeerId (base58) of the Pouch that stores this fragment.
259    pub pouch_peer_id: String,
260    /// GF(2⁸) coding vector stored alongside the data.
261    /// Length == `k`; needed by the challenger for Proof-of-Storage verification.
262    pub coding_vector: Vec<u8>,
263}
264
265// ── Chunk manifest ────────────────────────────────────────────────────────────
266
267/// Manifest entry for a single chunk of a file.
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct ChunkManifest {
270    /// BLAKE3 hash prefix (16 hex chars) of the **encrypted** chunk
271    /// (i.e. the data that was fed into `rlnc::encode`).
272    ///
273    /// Encryption is performed with [`crate::storage::ChunkCipher`] before
274    /// RLNC coding, so Pouch nodes never hold plaintext data.
275    pub chunk_id: String,
276    /// Zero-based index of this chunk in the file.
277    pub chunk_index: usize,
278    /// Original byte count of this chunk (before padding and encryption).
279    /// Used to trim padding after decoding.
280    pub original_size: usize,
281    /// Where each of the `n` fragments lives in the network.
282    pub fragment_locations: Vec<FragmentLocation>,
283}
284
285impl ChunkManifest {
286    /// Fragment locations on a specific peer (for fragment-fetch routing).
287    pub fn fragments_on_peer<'a>(&'a self, peer_id: &str) -> Vec<&'a FragmentLocation> {
288        self.fragment_locations
289            .iter()
290            .filter(|loc| loc.pouch_peer_id == peer_id)
291            .collect()
292    }
293
294    /// All Pouch peer IDs that hold at least one fragment of this chunk.
295    pub fn holder_peers(&self) -> Vec<&str> {
296        self.fragment_locations
297            .iter()
298            .map(|loc| loc.pouch_peer_id.as_str())
299            .collect()
300    }
301}
302
303// ── File manifest ─────────────────────────────────────────────────────────────
304
305/// Complete metadata descriptor for a file stored in a BillPouch network.
306///
307/// ## Field visibility
308///
309/// | Field           | Who can read                 | Why                              |
310/// |-----------------|------------------------------|----------------------------------|
311/// | `file_id`       | All network nodes            | Indexing and routing             |
312/// | `owner_fingerprint` | All                      | Attribution                      |
313/// | `encrypted_name`| Only nodes with network key  | Private metadata                 |
314/// | `size_bytes`    | All                          | Storage planning                 |
315/// | `k`, `n`, `q`  | All                          | Codec operation                  |
316/// | `ph`, `pe`      | All                          | Health monitoring                |
317/// | `chunks`        | All                          | Fragment routing                 |
318#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct FileManifest {
320    /// UUID v4 identifying this file in the network.
321    pub file_id: String,
322
323    /// SHA-256(pubkey)[0..8] hex of the uploading user.
324    pub owner_fingerprint: String,
325
326    /// Network this file belongs to.
327    pub network_id: String,
328
329    /// Filename encrypted with the network metadata key.
330    ///
331    /// Use [`NetworkMetaKey::encrypt`] / [`NetworkMetaKey::decrypt`] with the
332    /// key from [`NetworkMetaKey::for_network`] to read or write.
333    pub encrypted_name: Vec<u8>,
334
335    /// Original file size in bytes.
336    pub size_bytes: u64,
337
338    /// Chunk size in bytes used during upload (e.g. 1 MiB = 1_048_576).
339    pub chunk_size: usize,
340
341    /// Total number of chunks.
342    pub num_chunks: usize,
343
344    /// Recovery threshold: minimum fragments needed to reconstruct any chunk.
345    ///
346    /// Computed at upload time via
347    /// [`crate::coding::params::compute_coding_params`].
348    pub k: usize,
349
350    /// Total fragments generated per chunk (= k + redundancy).
351    pub n: usize,
352
353    /// Effective redundancy overhead fraction: `(n − k) / k`.
354    pub q: f64,
355
356    /// Target recovery probability declared at upload time (e.g. `0.999`).
357    pub ph: f64,
358
359    /// **Rolling** effective recovery probability, updated by the daemon as
360    /// QoS measurements evolve.  Starts equal to [`ph`](Self::ph) at upload.
361    pub pe: f64,
362
363    /// Per-chunk location manifests (one entry per chunk).
364    pub chunks: Vec<ChunkManifest>,
365
366    /// Unix timestamp (seconds) when the file was uploaded.
367    pub created_at: u64,
368}
369
370impl FileManifest {
371    /// Decrypt and return the original filename.
372    ///
373    /// # Errors
374    /// Returns `Err` if the key is wrong or the blob is corrupted.
375    pub fn decrypt_name(&self, key: &NetworkMetaKey) -> BpResult<String> {
376        let bytes = key.decrypt(&self.encrypted_name)?;
377        String::from_utf8(bytes)
378            .map_err(|e| BpError::Coding(format!("Filename is not valid UTF-8: {e}")))
379    }
380
381    /// Set the filename by encrypting it with the given network key.
382    pub fn set_name(&mut self, filename: &str, key: &NetworkMetaKey) {
383        self.encrypted_name = key.encrypt(filename.as_bytes());
384    }
385
386    /// Conservation ratio: fraction of the network's fragment locations that
387    /// still hold known-live fragments.
388    ///
389    /// A value of `1.0` means all `n × num_chunks` fragments are in place.
390    /// Below `k / n` the file cannot be recovered.
391    pub fn conservation_ratio(&self) -> f64 {
392        let total = self.n * self.num_chunks;
393        if total == 0 {
394            return 0.0;
395        }
396        let present: usize = self.chunks.iter().map(|c| c.fragment_locations.len()).sum();
397        present as f64 / total as f64
398    }
399
400    /// Whether every chunk has at least `k` fragment locations recorded.
401    pub fn is_recoverable(&self) -> bool {
402        self.chunks
403            .iter()
404            .all(|c| c.fragment_locations.len() >= self.k)
405    }
406
407    /// Serialise to a compact JSON byte blob (for gossip / DHT storage).
408    pub fn to_json_bytes(&self) -> BpResult<Vec<u8>> {
409        serde_json::to_vec(self).map_err(BpError::Serde)
410    }
411
412    /// Deserialise from a JSON byte blob.
413    pub fn from_json_bytes(bytes: &[u8]) -> BpResult<Self> {
414        serde_json::from_slice(bytes).map_err(BpError::Serde)
415    }
416}
417
418// ── Tests ─────────────────────────────────────────────────────────────────────
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    fn make_key(network: &str) -> NetworkMetaKey {
425        NetworkMetaKey::for_network(network)
426    }
427
428    // ── NetworkMetaKey ────────────────────────────────────────────────────
429
430    #[test]
431    fn key_deterministic_per_network() {
432        assert_eq!(make_key("amici"), make_key("amici"));
433        assert_ne!(make_key("amici"), make_key("lavoro"));
434    }
435
436    #[test]
437    fn encrypt_decrypt_roundtrip() {
438        let key = make_key("amici");
439        let msg = b"my secret filename.txt";
440        let blob = key.encrypt(msg);
441        let back = key.decrypt(&blob).unwrap();
442        assert_eq!(back, msg);
443    }
444
445    #[test]
446    fn encrypt_different_nonces() {
447        let key = make_key("amici");
448        let msg = b"same plaintext";
449        let a = key.encrypt(msg);
450        let b = key.encrypt(msg);
451        // Different nonces → different ciphertexts
452        assert_ne!(a, b);
453        // But both decrypt correctly
454        assert_eq!(key.decrypt(&a).unwrap(), msg);
455        assert_eq!(key.decrypt(&b).unwrap(), msg);
456    }
457
458    #[test]
459    fn decrypt_wrong_key_fails() {
460        let key1 = make_key("amici");
461        let key2 = make_key("lavoro");
462        let blob = key1.encrypt(b"hello");
463        assert!(key2.decrypt(&blob).is_err());
464    }
465
466    #[test]
467    fn decrypt_tampered_mac_fails() {
468        let key = make_key("amici");
469        let mut blob = key.encrypt(b"hello");
470        // Flip a byte in the MAC region (bytes 16..48)
471        blob[20] ^= 0xFF;
472        assert!(key.decrypt(&blob).is_err());
473    }
474
475    #[test]
476    fn decrypt_too_short_fails() {
477        let key = make_key("amici");
478        assert!(key.decrypt(&[0u8; 47]).is_err());
479    }
480
481    // ── FileManifest helpers ──────────────────────────────────────────────
482
483    fn make_manifest(network_id: &str, filename: &str) -> FileManifest {
484        let key = NetworkMetaKey::for_network(network_id);
485        let mut m = FileManifest {
486            file_id: uuid::Uuid::new_v4().to_string(),
487            owner_fingerprint: "a3f19c2b".into(),
488            network_id: network_id.into(),
489            encrypted_name: vec![],
490            size_bytes: 4_096_000,
491            chunk_size: 1_048_576,
492            num_chunks: 4,
493            k: 3,
494            n: 6,
495            q: 1.0,
496            ph: 0.999,
497            pe: 0.999,
498            chunks: vec![],
499            created_at: 1_710_000_000,
500        };
501        m.set_name(filename, &key);
502        m
503    }
504
505    #[test]
506    fn set_and_decrypt_name() {
507        let key = NetworkMetaKey::for_network("amici");
508        let manifest = make_manifest("amici", "holiday_photos.tar");
509        let name = manifest.decrypt_name(&key).unwrap();
510        assert_eq!(name, "holiday_photos.tar");
511    }
512
513    #[test]
514    fn conservation_ratio_all_fragments_present() {
515        let mut m = make_manifest("amici", "file.bin");
516        m.num_chunks = 2;
517        m.n = 3;
518        m.chunks = vec![
519            ChunkManifest {
520                chunk_id: "aaa".into(),
521                chunk_index: 0,
522                original_size: 1_048_576,
523                fragment_locations: vec![
524                    FragmentLocation {
525                        fragment_id: "f1".into(),
526                        pouch_peer_id: "p1".into(),
527                        coding_vector: vec![1, 2, 3],
528                    },
529                    FragmentLocation {
530                        fragment_id: "f2".into(),
531                        pouch_peer_id: "p2".into(),
532                        coding_vector: vec![4, 5, 6],
533                    },
534                    FragmentLocation {
535                        fragment_id: "f3".into(),
536                        pouch_peer_id: "p3".into(),
537                        coding_vector: vec![7, 8, 9],
538                    },
539                ],
540            },
541            ChunkManifest {
542                chunk_id: "bbb".into(),
543                chunk_index: 1,
544                original_size: 1_048_576,
545                fragment_locations: vec![
546                    FragmentLocation {
547                        fragment_id: "f4".into(),
548                        pouch_peer_id: "p1".into(),
549                        coding_vector: vec![1, 0, 0],
550                    },
551                    FragmentLocation {
552                        fragment_id: "f5".into(),
553                        pouch_peer_id: "p2".into(),
554                        coding_vector: vec![0, 1, 0],
555                    },
556                    FragmentLocation {
557                        fragment_id: "f6".into(),
558                        pouch_peer_id: "p3".into(),
559                        coding_vector: vec![0, 0, 1],
560                    },
561                ],
562            },
563        ];
564        assert!((m.conservation_ratio() - 1.0).abs() < 1e-9);
565        assert!(m.is_recoverable());
566    }
567
568    #[test]
569    fn conservation_ratio_partial() {
570        let mut m = make_manifest("amici", "file.bin");
571        m.num_chunks = 1;
572        m.k = 3;
573        m.n = 6;
574        m.chunks = vec![ChunkManifest {
575            chunk_id: "xxx".into(),
576            chunk_index: 0,
577            original_size: 512,
578            fragment_locations: vec![
579                FragmentLocation {
580                    fragment_id: "a".into(),
581                    pouch_peer_id: "p1".into(),
582                    coding_vector: vec![],
583                },
584                FragmentLocation {
585                    fragment_id: "b".into(),
586                    pouch_peer_id: "p2".into(),
587                    coding_vector: vec![],
588                },
589            ],
590        }];
591        let ratio = m.conservation_ratio();
592        assert!((ratio - 2.0 / 6.0).abs() < 1e-9);
593        // Only 2 fragments < k=3 → not recoverable
594        assert!(!m.is_recoverable());
595    }
596
597    #[test]
598    fn json_serialization_roundtrip() {
599        let m = make_manifest("amici", "document.pdf");
600        let bytes = m.to_json_bytes().unwrap();
601        let back = FileManifest::from_json_bytes(&bytes).unwrap();
602        assert_eq!(back.file_id, m.file_id);
603        assert_eq!(back.encrypted_name, m.encrypted_name);
604        assert_eq!(back.k, m.k);
605        assert_eq!(back.ph, m.ph);
606    }
607}