bp_core/coding/
rlnc.rs

1//! Random Linear Network Coding (RLNC) over GF(2⁸).
2//!
3//! ## Encoding — systematic form
4//!
5//! A chunk of bytes is split into `k` equally-sized **source symbols**.
6//! The encoder uses **systematic** encoding: the first `k` fragments carry
7//! the identity coefficient matrix (each fragment is one source symbol),
8//! guaranteeing that `frags[..k]` always decodes deterministically.  The
9//! remaining `n-k` fragments use random GF(2⁸) coefficients for redundancy.
10//!
11//! ```text
12//! fragment_data[i]    = Σ coeff[i][j] · source_symbol[j]   (over GF(2⁸), byte-wise)
13//! coding_vector[i]    = [coeff[i][0], coeff[i][1], ..., coeff[i][k-1]]
14//! coeff[i][j]         = δ(i,j)  for i < k   (systematic)
15//!                     = random  for i ≥ k   (redundant)
16//! ```
17//!
18//! ## Recoding — no decompression required
19//!
20//! A Pouch holding `m` fragments can produce new fragments by recombining them:
21//!
22//! ```text
23//! new_data    = Σ a[i] · fragment_data[i]
24//! new_vector  = Σ a[i] · coding_vector[i]
25//! ```
26//!
27//! The result is a valid encoded fragment of the same chunk.
28//! **The Pouch never touches the original chunk.**
29//!
30//! ## Decoding
31//!
32//! Any `k` linearly independent fragments suffice.  Gaussian elimination over
33//! GF(2⁸) on the `[coding_vectors | fragment_data]` augmented matrix yields
34//! the source symbols, which are then concatenated to recover the chunk.
35
36use crate::error::{BpError, BpResult};
37use rand::Rng;
38use serde::{Deserialize, Serialize};
39use uuid::Uuid;
40
41use super::gf256;
42
43// ── Types ─────────────────────────────────────────────────────────────────────
44
45/// A single RLNC-encoded fragment of a chunk.
46///
47/// Can be stored on disk, transmitted over the network, or used as input to
48/// [`recode`] to generate additional fragments without decoding.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct EncodedFragment {
51    /// Random UUID assigned at creation (encoding or recoding).
52    pub id: String,
53    /// BLAKE3 hash of the original chunk, hex-encoded (first 16 chars = 64 bits).
54    /// Used as the key in the fragment store and for integrity verification.
55    pub chunk_id: String,
56    /// Number of source symbols `k` this fragment was derived from.
57    pub k: usize,
58    /// Coefficients over GF(2⁸): one per source symbol.
59    /// Length == `k`.
60    pub coding_vector: Vec<u8>,
61    /// Linear combination of source symbols over GF(2⁸).
62    /// Length == `symbol_size` (chunk_size / k, padded).
63    pub data: Vec<u8>,
64}
65
66impl EncodedFragment {
67    /// Serialize to the on-disk binary format.
68    ///
69    /// ```text
70    /// [0..4]      magic: b"BPFG"
71    /// [4..8]      k: u32 LE
72    /// [8..8+k]    coding_vector
73    /// [8+k..]     data
74    /// ```
75    pub fn to_bytes(&self) -> Vec<u8> {
76        let mut out = Vec::with_capacity(8 + self.k + self.data.len());
77        out.extend_from_slice(b"BPFG");
78        out.extend_from_slice(&(self.k as u32).to_le_bytes());
79        out.extend_from_slice(&self.coding_vector);
80        out.extend_from_slice(&self.data);
81        out
82    }
83
84    /// Deserialize from the on-disk binary format.
85    ///
86    /// `id` and `chunk_id` are not stored in the binary blob — they come from
87    /// the filename / index and must be supplied by the caller.
88    pub fn from_bytes(id: String, chunk_id: String, bytes: &[u8]) -> BpResult<Self> {
89        if bytes.len() < 8 || &bytes[0..4] != b"BPFG" {
90            return Err(BpError::Coding("Invalid fragment magic bytes".into()));
91        }
92        let k = u32::from_le_bytes(bytes[4..8].try_into().unwrap()) as usize;
93        if bytes.len() < 8 + k {
94            return Err(BpError::Coding(
95                "Fragment too short for coding vector".into(),
96            ));
97        }
98        let coding_vector = bytes[8..8 + k].to_vec();
99        let data = bytes[8 + k..].to_vec();
100        Ok(Self {
101            id,
102            chunk_id,
103            k,
104            coding_vector,
105            data,
106        })
107    }
108}
109
110// ── Public API ────────────────────────────────────────────────────────────────
111
112/// Split `chunk` into `k` source symbols and produce `n` encoded fragments.
113///
114/// The first `k` fragments are **systematic** (identity coding vectors), so
115/// the original symbols can always be recovered from `frags[..k]` without
116/// any linear algebra.  The remaining `n-k` fragments use random coefficients
117/// drawn from GF(2⁸) for redundancy.
118///
119/// # Arguments
120/// - `chunk`     — raw bytes of the chunk to encode.
121/// - `k`         — number of source symbols (recovery threshold).
122/// - `n`         — total number of encoded fragments to produce (n ≥ k).
123///
124/// # Errors
125/// Returns an error if `n < k` or `chunk` is empty.
126pub fn encode(chunk: &[u8], k: usize, n: usize) -> BpResult<Vec<EncodedFragment>> {
127    if chunk.is_empty() {
128        return Err(BpError::Coding("Cannot encode empty chunk".into()));
129    }
130    if n < k {
131        return Err(BpError::Coding(format!("n ({n}) must be >= k ({k})")));
132    }
133    if k == 0 {
134        return Err(BpError::Coding("k must be > 0".into()));
135    }
136
137    let chunk_id = chunk_hash(chunk);
138    let (symbols, sym_size) = split_into_symbols(chunk, k);
139
140    let mut rng = rand::thread_rng();
141    let mut fragments = Vec::with_capacity(n);
142
143    for i in 0..n {
144        // First k fragments are systematic (standard basis e_i).
145        // Remaining n-k use random coefficients.
146        let coeffs: Vec<u8> = if i < k {
147            (0..k).map(|j| if j == i { 1u8 } else { 0u8 }).collect()
148        } else {
149            (0..k).map(|_| rng.gen::<u8>()).collect()
150        };
151        let data = combine(&symbols, &coeffs, sym_size);
152        fragments.push(EncodedFragment {
153            id: Uuid::new_v4().to_string(),
154            chunk_id: chunk_id.clone(),
155            k,
156            coding_vector: coeffs,
157            data,
158        });
159    }
160
161    Ok(fragments)
162}
163
164/// Produce `count` new fragments by recombining existing ones.
165///
166/// **No decoding is performed.** The Pouch only needs the fragments it holds.
167/// The resulting fragments are valid RLNC fragments of the same original chunk.
168///
169/// Requires at least 1 input fragment.
170///
171/// # Errors
172/// Returns an error if `fragments` is empty or `count` is 0.
173pub fn recode(fragments: &[EncodedFragment], count: usize) -> BpResult<Vec<EncodedFragment>> {
174    if fragments.is_empty() {
175        return Err(BpError::Coding("recode: no input fragments".into()));
176    }
177    if count == 0 {
178        return Err(BpError::Coding("recode: count must be > 0".into()));
179    }
180
181    let k = fragments[0].k;
182    let sym_size = fragments[0].data.len();
183    let chunk_id = fragments[0].chunk_id.clone();
184
185    let mut rng = rand::thread_rng();
186    let mut result = Vec::with_capacity(count);
187
188    for _ in 0..count {
189        // Draw random recoding coefficients, retrying if the resulting coding
190        // vector is the zero vector (which would make the fragment useless for
191        // decoding).  P(all-zero coding vector) ≈ (1/256)^m where m = input
192        // count, so at most one retry is ever needed in practice.
193        let (new_data, new_vector) = loop {
194            let coeffs: Vec<u8> = (0..fragments.len()).map(|_| rng.gen::<u8>()).collect();
195
196            let mut data = vec![0u8; sym_size];
197            let mut vector = vec![0u8; k];
198            for (frag, &a) in fragments.iter().zip(coeffs.iter()) {
199                gf256::mul_acc(&mut data, &frag.data, a);
200                gf256::mul_acc(&mut vector, &frag.coding_vector, a);
201            }
202
203            if vector.iter().any(|&b| b != 0) {
204                break (data, vector);
205            }
206            // zero vector — retry with new random coefficients
207        };
208
209        result.push(EncodedFragment {
210            id: Uuid::new_v4().to_string(),
211            chunk_id: chunk_id.clone(),
212            k,
213            coding_vector: new_vector,
214            data: new_data,
215        });
216    }
217
218    Ok(result)
219}
220
221/// Reconstruct the original chunk from `k` (or more) linearly independent fragments.
222///
223/// Uses Gaussian elimination over GF(2⁸) on the augmented matrix
224/// `[coding_vectors | fragment_data]`.
225///
226/// The returned bytes include any zero-padding added during encoding — the caller
227/// is responsible for stripping trailing padding to the original length (stored in
228/// the file manifest, not in the fragment itself).
229///
230/// # Errors
231/// - Fewer than `k` fragments provided.
232/// - Fragments are linearly dependent (singular matrix).
233/// - Inconsistent `k` or `data` length across fragments.
234pub fn decode(fragments: &[EncodedFragment]) -> BpResult<Vec<u8>> {
235    if fragments.is_empty() {
236        return Err(BpError::Coding("decode: no fragments provided".into()));
237    }
238
239    let k = fragments[0].k;
240    let sym_size = fragments[0].data.len();
241
242    if fragments.len() < k {
243        return Err(BpError::Coding(format!(
244            "decode: need {k} fragments, got {}",
245            fragments.len()
246        )));
247    }
248
249    // Validate consistency
250    for (i, f) in fragments[..k].iter().enumerate() {
251        if f.k != k {
252            return Err(BpError::Coding(format!(
253                "decode: fragment {i} has k={}, expected {k}",
254                f.k
255            )));
256        }
257        if f.data.len() != sym_size {
258            return Err(BpError::Coding(format!(
259                "decode: fragment {i} data len={}, expected {sym_size}",
260                f.data.len()
261            )));
262        }
263        if f.coding_vector.len() != k {
264            return Err(BpError::Coding(format!(
265                "decode: fragment {i} coding_vector len={}, expected {k}",
266                f.coding_vector.len()
267            )));
268        }
269    }
270
271    // Build augmented matrix: each row = [coding_vector (k bytes) | data (sym_size bytes)]
272    let mut matrix: Vec<Vec<u8>> = fragments[..k]
273        .iter()
274        .map(|f| {
275            let mut row = f.coding_vector.clone();
276            row.extend_from_slice(&f.data);
277            row
278        })
279        .collect();
280
281    // Gaussian elimination over GF(2⁸)
282    for col in 0..k {
283        // Find a non-zero pivot in column `col` at or below the current row
284        let pivot = (col..k).find(|&r| matrix[r][col] != 0).ok_or_else(|| {
285            BpError::Coding(format!(
286                "decode: matrix is singular at column {col} (linearly dependent fragments)"
287            ))
288        })?;
289        matrix.swap(col, pivot);
290
291        // Normalise pivot row so M[col][col] == 1
292        let pivot_val = matrix[col][col];
293        let pivot_inv = gf256::inv(pivot_val);
294        for elem in matrix[col].iter_mut() {
295            *elem = gf256::mul(*elem, pivot_inv);
296        }
297
298        // Eliminate column `col` from all other rows
299        for row in 0..k {
300            if row == col {
301                continue;
302            }
303            let factor = matrix[row][col];
304            if factor == 0 {
305                continue;
306            }
307            // Borrow trick: copy the pivot row to avoid borrow conflict
308            let pivot_row: Vec<u8> = matrix[col].clone();
309            for (dst, &src) in matrix[row].iter_mut().zip(pivot_row.iter()) {
310                *dst ^= gf256::mul(factor, src);
311            }
312        }
313    }
314
315    // After reduction to row echelon form, row i holds source symbol i
316    // in the right half of the augmented matrix.
317    let mut result = Vec::with_capacity(k * sym_size);
318    for row in &matrix {
319        result.extend_from_slice(&row[k..]);
320    }
321
322    Ok(result)
323}
324
325// ── Helpers ───────────────────────────────────────────────────────────────────
326
327/// BLAKE3 hash of a chunk, hex-encoded, first 16 chars (64-bit prefix).
328pub fn chunk_hash(chunk: &[u8]) -> String {
329    let hash = blake3::hash(chunk);
330    hash.to_hex()[..16].to_string()
331}
332
333/// Split `chunk` into exactly `k` symbols of equal byte length, zero-padding
334/// the last symbol if the chunk size is not divisible by `k`.
335fn split_into_symbols(chunk: &[u8], k: usize) -> (Vec<Vec<u8>>, usize) {
336    let sym_size = chunk.len().div_ceil(k);
337    let mut symbols = Vec::with_capacity(k);
338    for i in 0..k {
339        let start = i * sym_size;
340        let end = ((i + 1) * sym_size).min(chunk.len());
341        let mut sym = vec![0u8; sym_size];
342        if start < chunk.len() {
343            sym[..end - start].copy_from_slice(&chunk[start..end]);
344        }
345        symbols.push(sym);
346    }
347    (symbols, sym_size)
348}
349
350/// Compute a linear combination of `symbols` with given `coeffs` over GF(2⁸).
351fn combine(symbols: &[Vec<u8>], coeffs: &[u8], sym_size: usize) -> Vec<u8> {
352    let mut result = vec![0u8; sym_size];
353    for (sym, &c) in symbols.iter().zip(coeffs.iter()) {
354        gf256::mul_acc(&mut result, sym, c);
355    }
356    result
357}
358
359// ── Tests ─────────────────────────────────────────────────────────────────────
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    const TEST_CHUNK: &[u8] = b"Hello BillPouch! This is a test chunk for RLNC encoding.";
366
367    #[test]
368    fn encode_produces_n_fragments() {
369        let frags = encode(TEST_CHUNK, 4, 8).unwrap();
370        assert_eq!(frags.len(), 8);
371        for f in &frags {
372            assert_eq!(f.k, 4);
373            assert_eq!(f.coding_vector.len(), 4);
374            assert!(!f.id.is_empty());
375            assert_eq!(f.chunk_id.len(), 16);
376        }
377    }
378
379    #[test]
380    fn decode_exact_k_fragments() {
381        // The first k fragments are systematic (identity coding vectors),
382        // so this test is 100% deterministic — no random matrix involved.
383        let k = 4;
384        let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
385        let recovered = decode(&frags[..k]).unwrap();
386        assert!(recovered.starts_with(TEST_CHUNK));
387    }
388
389    #[test]
390    fn decode_any_k_subset() {
391        // Verify that any reordering of the k systematic fragments decodes
392        // correctly.  Reordering changes the pivot selection in Gaussian
393        // elimination — this is a deterministic correctness check.
394        let k = 4;
395        let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
396        // Use systematic frags in reverse order — deterministically independent
397        let reversed: Vec<_> = frags[..k].iter().cloned().rev().collect();
398        let recovered = decode(&reversed).unwrap();
399        assert!(recovered.starts_with(TEST_CHUNK));
400    }
401
402    #[test]
403    fn recode_produces_valid_fragments() {
404        let k = 4;
405        let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
406        // Recode 3 new fragments. recode() guarantees the coding vector is never
407        // all-zero (it retries internally), so every output fragment is usable.
408        let recoded = recode(&frags[..2], 3).unwrap();
409        assert_eq!(recoded.len(), 3);
410        for f in &recoded {
411            assert_eq!(f.coding_vector.len(), k);
412            assert_eq!(f.data.len(), frags[0].data.len());
413            assert!(
414                f.coding_vector.iter().any(|&b| b != 0),
415                "recode must never produce an all-zero coding vector"
416            );
417        }
418    }
419
420    #[test]
421    fn decode_from_recoded_fragments() {
422        // Verify that non-systematic (recoded-style) fragments decode correctly.
423        // We construct the fragments manually with a 2-shift cyclic permutation
424        // to distinguish from decode_only_recoded_no_originals (+1 shift).
425        // Permutation matrices have det = 1 over any field → always full-rank.
426        let k = 4;
427        let frags = encode(TEST_CHUNK, k, k).unwrap();
428        let chunk_id = frags[0].chunk_id.clone();
429
430        // Coding vector row i → e_{(i+2) mod k}  (shift by 2)
431        let recoded: Vec<EncodedFragment> = (0..k)
432            .map(|i| {
433                let j = (i + 2) % k;
434                let coding_vector = (0..k).map(|c| if c == j { 1u8 } else { 0u8 }).collect();
435                let data = frags[j].data.clone();
436                EncodedFragment {
437                    id: uuid::Uuid::new_v4().to_string(),
438                    chunk_id: chunk_id.clone(),
439                    k,
440                    coding_vector,
441                    data,
442                }
443            })
444            .collect();
445
446        let recovered = decode(&recoded).unwrap();
447        assert!(recovered.starts_with(TEST_CHUNK));
448    }
449
450    #[test]
451    fn decode_only_recoded_no_originals() {
452        // Verify that k fragments whose coding vectors are NOT the standard basis
453        // (i.e. no node holds an "original" systematic fragment) still decode.
454        //
455        // We build a cyclic permutation of the identity matrix as coding vectors:
456        //   row i → e_{(i+1) mod k}
457        // Permutation matrices have det = 1 over any field (including GF(2^8)),
458        // so this set is always full-rank — 100% deterministic, no RNG involved.
459        let k = 3;
460        let frags = encode(TEST_CHUNK, k, k).unwrap();
461        let chunk_id = frags[0].chunk_id.clone();
462
463        let recoded: Vec<EncodedFragment> = (0..k)
464            .map(|i| {
465                let j = (i + 1) % k;
466                // coding vector = e_j (cyclic shift by 1)
467                let coding_vector = (0..k).map(|c| if c == j { 1u8 } else { 0u8 }).collect();
468                // data = the j-th source symbol, which equals frags[j].data
469                // (systematic fragment j carries source symbol j unchanged)
470                let data = frags[j].data.clone();
471                EncodedFragment {
472                    id: uuid::Uuid::new_v4().to_string(),
473                    chunk_id: chunk_id.clone(),
474                    k,
475                    coding_vector,
476                    data,
477                }
478            })
479            .collect();
480
481        // None of the recoded frags should be at their "natural" position
482        for (i, f) in recoded.iter().enumerate() {
483            let identity_i: Vec<u8> = (0..k).map(|j| if j == i { 1u8 } else { 0u8 }).collect();
484            assert_ne!(f.coding_vector, identity_i);
485        }
486        let recovered = decode(&recoded).unwrap();
487        assert!(recovered.starts_with(TEST_CHUNK));
488    }
489
490    #[test]
491    fn serialization_roundtrip() {
492        let frags = encode(TEST_CHUNK, 3, 5).unwrap();
493        let f = &frags[0];
494        let bytes = f.to_bytes();
495        let back = EncodedFragment::from_bytes(f.id.clone(), f.chunk_id.clone(), &bytes).unwrap();
496        assert_eq!(back.k, f.k);
497        assert_eq!(back.coding_vector, f.coding_vector);
498        assert_eq!(back.data, f.data);
499    }
500
501    #[test]
502    fn encode_error_on_empty_chunk() {
503        assert!(encode(&[], 4, 8).is_err());
504    }
505
506    #[test]
507    fn encode_error_n_less_than_k() {
508        assert!(encode(TEST_CHUNK, 4, 2).is_err());
509    }
510
511    #[test]
512    fn decode_error_too_few_fragments() {
513        let frags = encode(TEST_CHUNK, 4, 6).unwrap();
514        assert!(decode(&frags[..2]).is_err());
515    }
516}