bp_core/coding/rlnc.rs
1//! Random Linear Network Coding (RLNC) over GF(2⁸).
2//!
3//! ## Encoding — systematic form
4//!
5//! A chunk of bytes is split into `k` equally-sized **source symbols**.
6//! The encoder uses **systematic** encoding: the first `k` fragments carry
7//! the identity coefficient matrix (each fragment is one source symbol),
8//! guaranteeing that `frags[..k]` always decodes deterministically. The
9//! remaining `n-k` fragments use random GF(2⁸) coefficients for redundancy.
10//!
11//! ```text
12//! fragment_data[i] = Σ coeff[i][j] · source_symbol[j] (over GF(2⁸), byte-wise)
13//! coding_vector[i] = [coeff[i][0], coeff[i][1], ..., coeff[i][k-1]]
14//! coeff[i][j] = δ(i,j) for i < k (systematic)
15//! = random for i ≥ k (redundant)
16//! ```
17//!
18//! ## Recoding — no decompression required
19//!
20//! A Pouch holding `m` fragments can produce new fragments by recombining them:
21//!
22//! ```text
23//! new_data = Σ a[i] · fragment_data[i]
24//! new_vector = Σ a[i] · coding_vector[i]
25//! ```
26//!
27//! The result is a valid encoded fragment of the same chunk.
28//! **The Pouch never touches the original chunk.**
29//!
30//! ## Decoding
31//!
32//! Any `k` linearly independent fragments suffice. Gaussian elimination over
33//! GF(2⁸) on the `[coding_vectors | fragment_data]` augmented matrix yields
34//! the source symbols, which are then concatenated to recover the chunk.
35
36use crate::error::{BpError, BpResult};
37use rand::Rng;
38use serde::{Deserialize, Serialize};
39use uuid::Uuid;
40
41use super::gf256;
42
43// ── Types ─────────────────────────────────────────────────────────────────────
44
45/// A single RLNC-encoded fragment of a chunk.
46///
47/// Can be stored on disk, transmitted over the network, or used as input to
48/// [`recode`] to generate additional fragments without decoding.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct EncodedFragment {
51 /// Random UUID assigned at creation (encoding or recoding).
52 pub id: String,
53 /// BLAKE3 hash of the original chunk, hex-encoded (first 16 chars = 64 bits).
54 /// Used as the key in the fragment store and for integrity verification.
55 pub chunk_id: String,
56 /// Number of source symbols `k` this fragment was derived from.
57 pub k: usize,
58 /// Coefficients over GF(2⁸): one per source symbol.
59 /// Length == `k`.
60 pub coding_vector: Vec<u8>,
61 /// Linear combination of source symbols over GF(2⁸).
62 /// Length == `symbol_size` (chunk_size / k, padded).
63 pub data: Vec<u8>,
64}
65
66impl EncodedFragment {
67 /// Serialize to the on-disk binary format.
68 ///
69 /// ```text
70 /// [0..4] magic: b"BPFG"
71 /// [4..8] k: u32 LE
72 /// [8..8+k] coding_vector
73 /// [8+k..] data
74 /// ```
75 pub fn to_bytes(&self) -> Vec<u8> {
76 let mut out = Vec::with_capacity(8 + self.k + self.data.len());
77 out.extend_from_slice(b"BPFG");
78 out.extend_from_slice(&(self.k as u32).to_le_bytes());
79 out.extend_from_slice(&self.coding_vector);
80 out.extend_from_slice(&self.data);
81 out
82 }
83
84 /// Deserialize from the on-disk binary format.
85 ///
86 /// `id` and `chunk_id` are not stored in the binary blob — they come from
87 /// the filename / index and must be supplied by the caller.
88 pub fn from_bytes(id: String, chunk_id: String, bytes: &[u8]) -> BpResult<Self> {
89 if bytes.len() < 8 || &bytes[0..4] != b"BPFG" {
90 return Err(BpError::Coding("Invalid fragment magic bytes".into()));
91 }
92 let k = u32::from_le_bytes(bytes[4..8].try_into().unwrap()) as usize;
93 if bytes.len() < 8 + k {
94 return Err(BpError::Coding(
95 "Fragment too short for coding vector".into(),
96 ));
97 }
98 let coding_vector = bytes[8..8 + k].to_vec();
99 let data = bytes[8 + k..].to_vec();
100 Ok(Self {
101 id,
102 chunk_id,
103 k,
104 coding_vector,
105 data,
106 })
107 }
108}
109
110// ── Public API ────────────────────────────────────────────────────────────────
111
112/// Split `chunk` into `k` source symbols and produce `n` encoded fragments.
113///
114/// The first `k` fragments are **systematic** (identity coding vectors), so
115/// the original symbols can always be recovered from `frags[..k]` without
116/// any linear algebra. The remaining `n-k` fragments use random coefficients
117/// drawn from GF(2⁸) for redundancy.
118///
119/// # Arguments
120/// - `chunk` — raw bytes of the chunk to encode.
121/// - `k` — number of source symbols (recovery threshold).
122/// - `n` — total number of encoded fragments to produce (n ≥ k).
123///
124/// # Errors
125/// Returns an error if `n < k` or `chunk` is empty.
126pub fn encode(chunk: &[u8], k: usize, n: usize) -> BpResult<Vec<EncodedFragment>> {
127 if chunk.is_empty() {
128 return Err(BpError::Coding("Cannot encode empty chunk".into()));
129 }
130 if n < k {
131 return Err(BpError::Coding(format!("n ({n}) must be >= k ({k})")));
132 }
133 if k == 0 {
134 return Err(BpError::Coding("k must be > 0".into()));
135 }
136
137 let chunk_id = chunk_hash(chunk);
138 let (symbols, sym_size) = split_into_symbols(chunk, k);
139
140 let mut rng = rand::thread_rng();
141 let mut fragments = Vec::with_capacity(n);
142
143 for i in 0..n {
144 // First k fragments are systematic (standard basis e_i).
145 // Remaining n-k use random coefficients.
146 let coeffs: Vec<u8> = if i < k {
147 (0..k).map(|j| if j == i { 1u8 } else { 0u8 }).collect()
148 } else {
149 (0..k).map(|_| rng.gen::<u8>()).collect()
150 };
151 let data = combine(&symbols, &coeffs, sym_size);
152 fragments.push(EncodedFragment {
153 id: Uuid::new_v4().to_string(),
154 chunk_id: chunk_id.clone(),
155 k,
156 coding_vector: coeffs,
157 data,
158 });
159 }
160
161 Ok(fragments)
162}
163
164/// Produce `count` new fragments by recombining existing ones.
165///
166/// **No decoding is performed.** The Pouch only needs the fragments it holds.
167/// The resulting fragments are valid RLNC fragments of the same original chunk.
168///
169/// Requires at least 1 input fragment.
170///
171/// # Errors
172/// Returns an error if `fragments` is empty or `count` is 0.
173pub fn recode(fragments: &[EncodedFragment], count: usize) -> BpResult<Vec<EncodedFragment>> {
174 if fragments.is_empty() {
175 return Err(BpError::Coding("recode: no input fragments".into()));
176 }
177 if count == 0 {
178 return Err(BpError::Coding("recode: count must be > 0".into()));
179 }
180
181 let k = fragments[0].k;
182 let sym_size = fragments[0].data.len();
183 let chunk_id = fragments[0].chunk_id.clone();
184
185 let mut rng = rand::thread_rng();
186 let mut result = Vec::with_capacity(count);
187
188 for _ in 0..count {
189 // Draw random recoding coefficients, retrying if the resulting coding
190 // vector is the zero vector (which would make the fragment useless for
191 // decoding). P(all-zero coding vector) ≈ (1/256)^m where m = input
192 // count, so at most one retry is ever needed in practice.
193 let (new_data, new_vector) = loop {
194 let coeffs: Vec<u8> = (0..fragments.len()).map(|_| rng.gen::<u8>()).collect();
195
196 let mut data = vec![0u8; sym_size];
197 let mut vector = vec![0u8; k];
198 for (frag, &a) in fragments.iter().zip(coeffs.iter()) {
199 gf256::mul_acc(&mut data, &frag.data, a);
200 gf256::mul_acc(&mut vector, &frag.coding_vector, a);
201 }
202
203 if vector.iter().any(|&b| b != 0) {
204 break (data, vector);
205 }
206 // zero vector — retry with new random coefficients
207 };
208
209 result.push(EncodedFragment {
210 id: Uuid::new_v4().to_string(),
211 chunk_id: chunk_id.clone(),
212 k,
213 coding_vector: new_vector,
214 data: new_data,
215 });
216 }
217
218 Ok(result)
219}
220
221/// Reconstruct the original chunk from `k` (or more) linearly independent fragments.
222///
223/// Uses Gaussian elimination over GF(2⁸) on the augmented matrix
224/// `[coding_vectors | fragment_data]`.
225///
226/// The returned bytes include any zero-padding added during encoding — the caller
227/// is responsible for stripping trailing padding to the original length (stored in
228/// the file manifest, not in the fragment itself).
229///
230/// # Errors
231/// - Fewer than `k` fragments provided.
232/// - Fragments are linearly dependent (singular matrix).
233/// - Inconsistent `k` or `data` length across fragments.
234pub fn decode(fragments: &[EncodedFragment]) -> BpResult<Vec<u8>> {
235 if fragments.is_empty() {
236 return Err(BpError::Coding("decode: no fragments provided".into()));
237 }
238
239 let k = fragments[0].k;
240 let sym_size = fragments[0].data.len();
241
242 if fragments.len() < k {
243 return Err(BpError::Coding(format!(
244 "decode: need {k} fragments, got {}",
245 fragments.len()
246 )));
247 }
248
249 // Validate consistency
250 for (i, f) in fragments[..k].iter().enumerate() {
251 if f.k != k {
252 return Err(BpError::Coding(format!(
253 "decode: fragment {i} has k={}, expected {k}",
254 f.k
255 )));
256 }
257 if f.data.len() != sym_size {
258 return Err(BpError::Coding(format!(
259 "decode: fragment {i} data len={}, expected {sym_size}",
260 f.data.len()
261 )));
262 }
263 if f.coding_vector.len() != k {
264 return Err(BpError::Coding(format!(
265 "decode: fragment {i} coding_vector len={}, expected {k}",
266 f.coding_vector.len()
267 )));
268 }
269 }
270
271 // Build augmented matrix: each row = [coding_vector (k bytes) | data (sym_size bytes)]
272 let mut matrix: Vec<Vec<u8>> = fragments[..k]
273 .iter()
274 .map(|f| {
275 let mut row = f.coding_vector.clone();
276 row.extend_from_slice(&f.data);
277 row
278 })
279 .collect();
280
281 // Gaussian elimination over GF(2⁸)
282 for col in 0..k {
283 // Find a non-zero pivot in column `col` at or below the current row
284 let pivot = (col..k).find(|&r| matrix[r][col] != 0).ok_or_else(|| {
285 BpError::Coding(format!(
286 "decode: matrix is singular at column {col} (linearly dependent fragments)"
287 ))
288 })?;
289 matrix.swap(col, pivot);
290
291 // Normalise pivot row so M[col][col] == 1
292 let pivot_val = matrix[col][col];
293 let pivot_inv = gf256::inv(pivot_val);
294 for elem in matrix[col].iter_mut() {
295 *elem = gf256::mul(*elem, pivot_inv);
296 }
297
298 // Eliminate column `col` from all other rows
299 for row in 0..k {
300 if row == col {
301 continue;
302 }
303 let factor = matrix[row][col];
304 if factor == 0 {
305 continue;
306 }
307 // Borrow trick: copy the pivot row to avoid borrow conflict
308 let pivot_row: Vec<u8> = matrix[col].clone();
309 for (dst, &src) in matrix[row].iter_mut().zip(pivot_row.iter()) {
310 *dst ^= gf256::mul(factor, src);
311 }
312 }
313 }
314
315 // After reduction to row echelon form, row i holds source symbol i
316 // in the right half of the augmented matrix.
317 let mut result = Vec::with_capacity(k * sym_size);
318 for row in &matrix {
319 result.extend_from_slice(&row[k..]);
320 }
321
322 Ok(result)
323}
324
325// ── Helpers ───────────────────────────────────────────────────────────────────
326
327/// BLAKE3 hash of a chunk, hex-encoded, first 16 chars (64-bit prefix).
328pub fn chunk_hash(chunk: &[u8]) -> String {
329 let hash = blake3::hash(chunk);
330 hash.to_hex()[..16].to_string()
331}
332
333/// Split `chunk` into exactly `k` symbols of equal byte length, zero-padding
334/// the last symbol if the chunk size is not divisible by `k`.
335fn split_into_symbols(chunk: &[u8], k: usize) -> (Vec<Vec<u8>>, usize) {
336 let sym_size = chunk.len().div_ceil(k);
337 let mut symbols = Vec::with_capacity(k);
338 for i in 0..k {
339 let start = i * sym_size;
340 let end = ((i + 1) * sym_size).min(chunk.len());
341 let mut sym = vec![0u8; sym_size];
342 if start < chunk.len() {
343 sym[..end - start].copy_from_slice(&chunk[start..end]);
344 }
345 symbols.push(sym);
346 }
347 (symbols, sym_size)
348}
349
350/// Compute a linear combination of `symbols` with given `coeffs` over GF(2⁸).
351fn combine(symbols: &[Vec<u8>], coeffs: &[u8], sym_size: usize) -> Vec<u8> {
352 let mut result = vec![0u8; sym_size];
353 for (sym, &c) in symbols.iter().zip(coeffs.iter()) {
354 gf256::mul_acc(&mut result, sym, c);
355 }
356 result
357}
358
359// ── Tests ─────────────────────────────────────────────────────────────────────
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 const TEST_CHUNK: &[u8] = b"Hello BillPouch! This is a test chunk for RLNC encoding.";
366
367 #[test]
368 fn encode_produces_n_fragments() {
369 let frags = encode(TEST_CHUNK, 4, 8).unwrap();
370 assert_eq!(frags.len(), 8);
371 for f in &frags {
372 assert_eq!(f.k, 4);
373 assert_eq!(f.coding_vector.len(), 4);
374 assert!(!f.id.is_empty());
375 assert_eq!(f.chunk_id.len(), 16);
376 }
377 }
378
379 #[test]
380 fn decode_exact_k_fragments() {
381 // The first k fragments are systematic (identity coding vectors),
382 // so this test is 100% deterministic — no random matrix involved.
383 let k = 4;
384 let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
385 let recovered = decode(&frags[..k]).unwrap();
386 assert!(recovered.starts_with(TEST_CHUNK));
387 }
388
389 #[test]
390 fn decode_any_k_subset() {
391 // Verify that any reordering of the k systematic fragments decodes
392 // correctly. Reordering changes the pivot selection in Gaussian
393 // elimination — this is a deterministic correctness check.
394 let k = 4;
395 let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
396 // Use systematic frags in reverse order — deterministically independent
397 let reversed: Vec<_> = frags[..k].iter().cloned().rev().collect();
398 let recovered = decode(&reversed).unwrap();
399 assert!(recovered.starts_with(TEST_CHUNK));
400 }
401
402 #[test]
403 fn recode_produces_valid_fragments() {
404 let k = 4;
405 let frags = encode(TEST_CHUNK, k, k + 4).unwrap();
406 // Recode 3 new fragments. recode() guarantees the coding vector is never
407 // all-zero (it retries internally), so every output fragment is usable.
408 let recoded = recode(&frags[..2], 3).unwrap();
409 assert_eq!(recoded.len(), 3);
410 for f in &recoded {
411 assert_eq!(f.coding_vector.len(), k);
412 assert_eq!(f.data.len(), frags[0].data.len());
413 assert!(
414 f.coding_vector.iter().any(|&b| b != 0),
415 "recode must never produce an all-zero coding vector"
416 );
417 }
418 }
419
420 #[test]
421 fn decode_from_recoded_fragments() {
422 // Verify that non-systematic (recoded-style) fragments decode correctly.
423 // We construct the fragments manually with a 2-shift cyclic permutation
424 // to distinguish from decode_only_recoded_no_originals (+1 shift).
425 // Permutation matrices have det = 1 over any field → always full-rank.
426 let k = 4;
427 let frags = encode(TEST_CHUNK, k, k).unwrap();
428 let chunk_id = frags[0].chunk_id.clone();
429
430 // Coding vector row i → e_{(i+2) mod k} (shift by 2)
431 let recoded: Vec<EncodedFragment> = (0..k)
432 .map(|i| {
433 let j = (i + 2) % k;
434 let coding_vector = (0..k).map(|c| if c == j { 1u8 } else { 0u8 }).collect();
435 let data = frags[j].data.clone();
436 EncodedFragment {
437 id: uuid::Uuid::new_v4().to_string(),
438 chunk_id: chunk_id.clone(),
439 k,
440 coding_vector,
441 data,
442 }
443 })
444 .collect();
445
446 let recovered = decode(&recoded).unwrap();
447 assert!(recovered.starts_with(TEST_CHUNK));
448 }
449
450 #[test]
451 fn decode_only_recoded_no_originals() {
452 // Verify that k fragments whose coding vectors are NOT the standard basis
453 // (i.e. no node holds an "original" systematic fragment) still decode.
454 //
455 // We build a cyclic permutation of the identity matrix as coding vectors:
456 // row i → e_{(i+1) mod k}
457 // Permutation matrices have det = 1 over any field (including GF(2^8)),
458 // so this set is always full-rank — 100% deterministic, no RNG involved.
459 let k = 3;
460 let frags = encode(TEST_CHUNK, k, k).unwrap();
461 let chunk_id = frags[0].chunk_id.clone();
462
463 let recoded: Vec<EncodedFragment> = (0..k)
464 .map(|i| {
465 let j = (i + 1) % k;
466 // coding vector = e_j (cyclic shift by 1)
467 let coding_vector = (0..k).map(|c| if c == j { 1u8 } else { 0u8 }).collect();
468 // data = the j-th source symbol, which equals frags[j].data
469 // (systematic fragment j carries source symbol j unchanged)
470 let data = frags[j].data.clone();
471 EncodedFragment {
472 id: uuid::Uuid::new_v4().to_string(),
473 chunk_id: chunk_id.clone(),
474 k,
475 coding_vector,
476 data,
477 }
478 })
479 .collect();
480
481 // None of the recoded frags should be at their "natural" position
482 for (i, f) in recoded.iter().enumerate() {
483 let identity_i: Vec<u8> = (0..k).map(|j| if j == i { 1u8 } else { 0u8 }).collect();
484 assert_ne!(f.coding_vector, identity_i);
485 }
486 let recovered = decode(&recoded).unwrap();
487 assert!(recovered.starts_with(TEST_CHUNK));
488 }
489
490 #[test]
491 fn serialization_roundtrip() {
492 let frags = encode(TEST_CHUNK, 3, 5).unwrap();
493 let f = &frags[0];
494 let bytes = f.to_bytes();
495 let back = EncodedFragment::from_bytes(f.id.clone(), f.chunk_id.clone(), &bytes).unwrap();
496 assert_eq!(back.k, f.k);
497 assert_eq!(back.coding_vector, f.coding_vector);
498 assert_eq!(back.data, f.data);
499 }
500
501 #[test]
502 fn encode_error_on_empty_chunk() {
503 assert!(encode(&[], 4, 8).is_err());
504 }
505
506 #[test]
507 fn encode_error_n_less_than_k() {
508 assert!(encode(TEST_CHUNK, 4, 2).is_err());
509 }
510
511 #[test]
512 fn decode_error_too_few_fragments() {
513 let frags = encode(TEST_CHUNK, 4, 6).unwrap();
514 assert!(decode(&frags[..2]).is_err());
515 }
516}