bp_core/coding/params.rs
1//! Network-aware coding parameters — adaptive `k` computation.
2//!
3//! ## Problem
4//!
5//! Given a network of `N` Pouch peers, each with an independent availability
6//! probability `p_i` (the **stability score** from [`crate::network::qos`]),
7//! and given a target high recovery probability `Ph`, we need to find the
8//! largest integer `k` such that:
9//!
10//! ```text
11//! P(X ≥ k) ≥ Ph where X = Σ Bernoulli(p_i)
12//! ```
13//!
14//! `X` follows a **Poisson-Binomial** distribution.
15//!
16//! ## Normal approximation
17//!
18//! For the sizes encountered in practice (N ≥ 5), the Poisson-Binomial can be
19//! approximated by a normal distribution with matching mean and variance:
20//!
21//! ```text
22//! μ = Σ p_i (expected recoverable fragments)
23//! σ² = Σ p_i·(1−p_i) (variance)
24//!
25//! P(X ≥ k) ≈ 1 − Φ((k − 0.5 − μ) / σ) ← continuity correction
26//! ```
27//!
28//! Solving for k:
29//!
30//! ```text
31//! k = ⌊ μ − z(Ph) · σ + 0.5 ⌋
32//! ```
33//!
34//! where `z(Ph)` is the standard-normal `Ph`-quantile (probit).
35//!
36//! ## Redundancy overhead `q`
37//!
38//! The total number of fragments distributed per chunk is:
39//!
40//! ```text
41//! n = round(k · (1 + q_target))
42//! q = (n − k) / k (effective overhead fraction)
43//! ```
44//!
45//! The default `q_target = 1.0` means each chunk generates `~2k` fragments
46//! (one copy-equivalent of redundancy on top of the recovery threshold).
47//!
48//! ## Rolling pe
49//!
50//! After upload, the actual observed probability `pe` can be recomputed at any
51//! time using [`effective_recovery_probability`] with the latest stability scores
52//! and the stored `k`.
53
54use crate::error::{BpError, BpResult};
55
56// ── Public parameters record ──────────────────────────────────────────────────
57
58/// Coding parameters computed for a specific network state and target `Ph`.
59///
60/// Stored in the [`FileManifest`](crate::storage::manifest::FileManifest)
61/// at upload time and used by the daemon to determine how many fragments to
62/// generate and distribute per chunk.
63#[derive(Debug, Clone)]
64pub struct NetworkCodingParams {
65 /// Target recovery probability requested at computation time.
66 pub ph: f64,
67 /// Recovery threshold: the minimum fragments needed to reconstruct a chunk.
68 pub k: usize,
69 /// Total fragments to generate per chunk (= k + redundancy).
70 pub n: usize,
71 /// Effective redundancy overhead: `(n − k) / k`.
72 pub q: f64,
73 /// Number of Pouch peers considered in this computation.
74 pub peer_count: usize,
75 /// Poisson-Binomial mean `μ = Σ p_i`.
76 pub mu: f64,
77 /// Poisson-Binomial std-dev `σ = sqrt(Σ p_i·(1−p_i))`.
78 pub sigma: f64,
79}
80
81impl NetworkCodingParams {
82 /// Effective recovery probability given the same set of stability scores.
83 ///
84 /// Useful for re-evaluating `pe` with updated QoS data after upload.
85 pub fn effective_probability(&self) -> f64 {
86 if self.sigma < 1e-9 {
87 return if self.k as f64 <= self.mu { 1.0 } else { 0.0 };
88 }
89 let z = (self.k as f64 - 0.5 - self.mu) / self.sigma;
90 1.0 - standard_normal_cdf(z)
91 }
92}
93
94// ── Main entry point ──────────────────────────────────────────────────────────
95
96/// Compute the optimal coding parameters for the given network state.
97///
98/// # Arguments
99///
100/// - `stabilities` — slice of per-peer stability scores `p_i ∈ [0.0, 1.0]`
101/// produced by [`crate::network::qos::QosRegistry::all_stability_scores`].
102/// - `ph` — target recovery probability, e.g. `0.999`.
103/// - `q_target` — desired redundancy overhead fraction, e.g. `1.0` (= 2×k
104/// total fragments per chunk). Must be `> 0.0`.
105///
106/// # Errors
107///
108/// - No peers provided (empty `stabilities`).
109/// - `ph` outside `(0.0, 1.0)`.
110/// - `q_target ≤ 0.0`.
111/// - Network too small or too unreliable to satisfy `Ph` (k would be ≤ 0).
112pub fn compute_coding_params(
113 stabilities: &[f64],
114 ph: f64,
115 q_target: f64,
116) -> BpResult<NetworkCodingParams> {
117 if stabilities.is_empty() {
118 return Err(BpError::Coding(
119 "Cannot compute coding params: no peers available".into(),
120 ));
121 }
122 if !(0.0 < ph && ph < 1.0) {
123 return Err(BpError::Coding(format!("Ph ({ph}) must be in (0.0, 1.0)")));
124 }
125 if q_target <= 0.0 {
126 return Err(BpError::Coding(format!(
127 "q_target ({q_target}) must be > 0.0"
128 )));
129 }
130
131 let peer_count = stabilities.len();
132
133 // Poisson-Binomial mean and variance
134 let mu: f64 = stabilities.iter().sum();
135 let sigma_sq: f64 = stabilities.iter().map(|&p| p * (1.0 - p)).sum();
136 let sigma = sigma_sq.sqrt();
137
138 // k = floor(μ − z(Ph) · σ + 0.5)
139 // z(Ph) is the probit (inverse standard-normal CDF) at Ph.
140 let z_ph = probit(ph);
141 let k_float = mu - z_ph * sigma + 0.5;
142
143 if k_float < 1.0 {
144 return Err(BpError::Coding(format!(
145 "Network too small or unreliable to achieve Ph={ph:.4}: \
146 computed k={k_float:.2} < 1 (μ={mu:.2}, σ={sigma:.2})"
147 )));
148 }
149
150 let k = k_float.floor() as usize;
151
152 // n = round(k * (1 + q_target)), but at least k+1
153 let n = ((k as f64) * (1.0 + q_target)).round().max(k as f64 + 1.0) as usize;
154
155 // n must not exceed total peer count (one fragment per Pouch)
156 let n = n.min(peer_count);
157
158 // After capping n, recheck that n >= k
159 if n < k {
160 return Err(BpError::Coding(format!(
161 "After capping to peer_count ({peer_count}), n ({n}) < k ({k}). \
162 Not enough peers to store the required fragments."
163 )));
164 }
165
166 let q = (n - k) as f64 / k as f64;
167
168 Ok(NetworkCodingParams {
169 ph,
170 k,
171 n,
172 q,
173 peer_count,
174 mu,
175 sigma,
176 })
177}
178
179/// Compute the **network storage utilisation factor** `k / N`.
180///
181/// Given the per-peer stability scores and a per-tier target recovery
182/// probability `Ph`, this returns the largest fraction `k / N ∈ [0.0, 1.0]`
183/// such that:
184///
185/// ```text
186/// P(X ≥ k) ≥ Ph where X ~ PoissonBinomial(p_1, …, p_N)
187/// ```
188///
189/// Interpretation: for every raw byte stored by a node, only `k/N` bytes
190/// correspond to recoverable file content (the rest is redundancy).
191///
192/// # Arguments
193///
194/// - `stabilities` — per-Pouch stability scores including the **own node**.
195/// Use `0.4` as the default for a new node with no QoS history.
196/// - `ph` — target recovery probability (from
197/// [`crate::network::ReputationTier::qos_target_ph`]).
198///
199/// Returns `0.0` when the network is empty, too small, or too unreliable
200/// to satisfy `Ph`.
201pub fn compute_network_storage_factor(stabilities: &[f64], ph: f64) -> f64 {
202 let n = stabilities.len();
203 if n == 0 || !(0.0 < ph && ph < 1.0) {
204 return 0.0;
205 }
206 let mu: f64 = stabilities.iter().sum();
207 let sigma: f64 = stabilities
208 .iter()
209 .map(|&p| p * (1.0 - p))
210 .sum::<f64>()
211 .sqrt();
212 let z_ph = probit(ph);
213 let k_float = mu - z_ph * sigma + 0.5;
214 if k_float < 1.0 {
215 return 0.0;
216 }
217 let k = (k_float.floor() as usize).min(n);
218 k as f64 / n as f64
219}
220
221/// Recompute the **rolling effective recovery probability** `Pe` for a file
222/// that was uploaded with threshold `k`, given the current stability scores.
223///
224/// This is called periodically by the daemon to update `pe` in the
225/// [`FileManifest`](crate::storage::manifest::FileManifest).
226///
227/// ```text
228/// Pe = P(X ≥ k) ≈ 1 − Φ((k − 0.5 − μ) / σ)
229/// ```
230pub fn effective_recovery_probability(stabilities: &[f64], k: usize) -> f64 {
231 let mu: f64 = stabilities.iter().sum();
232 let sigma_sq: f64 = stabilities.iter().map(|&p| p * (1.0 - p)).sum();
233 let sigma = sigma_sq.sqrt();
234 if sigma < 1e-9 {
235 return if k as f64 <= mu { 1.0 } else { 0.0 };
236 }
237 let z = (k as f64 - 0.5 - mu) / sigma;
238 1.0 - standard_normal_cdf(z)
239}
240
241// ── Statistics helpers ────────────────────────────────────────────────────────
242
243/// Standard normal CDF `Φ(x)` using a rational approximation.
244///
245/// Maximum absolute error < 7.5 × 10⁻⁸ (Hart, 1968 / Abramowitz & Stegun 26.2.17).
246pub fn standard_normal_cdf(x: f64) -> f64 {
247 // Use the complementary error function: Φ(x) = 0.5 · erfc(−x / √2)
248 let t = x / std::f64::consts::SQRT_2;
249 0.5 * erfc_approx(-t)
250}
251
252/// Inverse standard normal CDF (probit) using the Beasley-Springer-Moro
253/// rational approximation. Accurate to ~10⁻⁹ for `p ∈ (10⁻⁶, 1−10⁻⁶)`.
254///
255/// Reference: Peter J. Acklam, "An algorithm for computing the inverse normal
256/// cumulative distribution function", 2003.
257pub fn probit(p: f64) -> f64 {
258 debug_assert!(0.0 < p && p < 1.0, "probit: p must be in (0, 1)");
259
260 // Coefficients for the rational approximation
261 const A: [f64; 6] = [
262 -3.969_683_028_665_376e1,
263 2.209_460_984_245_205e2,
264 -2.759_285_104_469_687e2,
265 1.383_577_518_672_69e2,
266 -3.066_479_806_614_716e1,
267 2.506_628_277_459_239,
268 ];
269 const B: [f64; 5] = [
270 -5.447_609_879_822_406e1,
271 1.615_858_368_580_409e2,
272 -1.556_989_798_598_866e2,
273 6.680_131_188_771_972e1,
274 -1.328_068_155_288_572e1,
275 ];
276 const C: [f64; 6] = [
277 -7.784_894_002_430_293e-3,
278 -3.223_964_580_411_365e-1,
279 -2.400_758_277_161_838,
280 -2.549_732_539_343_734,
281 4.374_664_141_464_968,
282 2.938_163_982_698_783,
283 ];
284 const D: [f64; 4] = [
285 7.784_695_709_041_462e-3,
286 3.224_671_290_700_398e-1,
287 2.445_134_137_142_996,
288 3.754_408_661_907_416,
289 ];
290
291 let p_lo = 0.02425_f64;
292 let p_hi = 1.0 - p_lo;
293
294 if p < p_lo {
295 // Rational approximation for lower tail
296 let q = (-2.0 * p.ln()).sqrt();
297 (((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
298 / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
299 } else if p <= p_hi {
300 // Rational approximation for central region
301 let q = p - 0.5;
302 let r = q * q;
303 (((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q
304 / (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1.0)
305 } else {
306 // Rational approximation for upper tail (use symmetry)
307 -probit(1.0 - p)
308 }
309}
310
311/// Complementary error function approximation used internally by `standard_normal_cdf`.
312fn erfc_approx(x: f64) -> f64 {
313 // Horner-form Chebyshev approximation; error < 1.5e-7 for all real x.
314 let t = 1.0 / (1.0 + 0.3275911 * x.abs());
315 let poly = t
316 * (0.254_829_592
317 + t * (-0.284_496_736
318 + t * (1.421_413_741 + t * (-1.453_152_027 + t * 1.061_405_429))));
319 let approx = poly * (-x * x).exp();
320 if x >= 0.0 {
321 approx
322 } else {
323 2.0 - approx
324 }
325}
326
327// ── Tests ─────────────────────────────────────────────────────────────────────
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
334 (a - b).abs() < tol
335 }
336
337 // ── Statistics ────────────────────────────────────────────────────────
338
339 #[test]
340 fn standard_normal_cdf_known_values() {
341 assert!(approx_eq(standard_normal_cdf(0.0), 0.5, 1e-6));
342 assert!(approx_eq(standard_normal_cdf(1.0), 0.841_344_7, 1e-5));
343 assert!(approx_eq(standard_normal_cdf(-1.0), 0.158_655_3, 1e-5));
344 assert!(approx_eq(standard_normal_cdf(3.0), 0.998_650_1, 1e-5));
345 assert!(approx_eq(standard_normal_cdf(-3.0), 0.001_349_9, 1e-5));
346 }
347
348 #[test]
349 fn probit_known_values() {
350 // probit(Φ(z)) ≈ z
351 for z in [-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0] {
352 let p = standard_normal_cdf(z);
353 assert!(
354 approx_eq(probit(p), z, 1e-4),
355 "z={z} p={p} probit={}",
356 probit(p)
357 );
358 }
359 }
360
361 #[test]
362 fn probit_0_5_is_zero() {
363 assert!(probit(0.5).abs() < 1e-9);
364 }
365
366 // ── compute_coding_params ─────────────────────────────────────────────
367
368 #[test]
369 fn uniform_perfect_peers() {
370 // 10 peers, each with stability 1.0, Ph=0.999
371 // μ=10, σ=0 → k = floor(10 - 3.09*0 + 0.5) = 10
372 // But σ≈0, so k = floor(μ + 0.5) = 10
373 let stabilities = vec![1.0_f64; 10];
374 let params = compute_coding_params(&stabilities, 0.999, 1.0).unwrap();
375 assert!(params.k > 0);
376 assert!(params.k <= 10);
377 assert!(params.n >= params.k);
378 assert!(approx_eq(
379 params.q,
380 (params.n - params.k) as f64 / params.k as f64,
381 1e-9
382 ));
383 }
384
385 #[test]
386 fn compute_k_decreases_with_lower_ph() {
387 let stabilities = vec![0.9_f64; 20];
388 let p_high = compute_coding_params(&stabilities, 0.999, 1.0).unwrap();
389 let p_low = compute_coding_params(&stabilities, 0.9, 1.0).unwrap();
390 // Lower Ph allows a higher k (fewer margins needed)
391 assert!(p_high.k <= p_low.k || p_high.k == p_low.k);
392 }
393
394 #[test]
395 fn compute_k_with_mixed_stabilities() {
396 // Heterogeneous network: some fast, some unreliable peers
397 let stabilities = vec![0.95, 0.9, 0.8, 0.7, 0.6, 0.95, 0.85, 0.75];
398 let params = compute_coding_params(&stabilities, 0.99, 1.0).unwrap();
399 assert!(params.k >= 1);
400 assert!(params.n >= params.k);
401 assert!(params.ph == 0.99);
402 // Effective probability should be close to or above ph
403 assert!(params.effective_probability() >= 0.9);
404 }
405
406 #[test]
407 fn error_on_empty_stabilities() {
408 assert!(compute_coding_params(&[], 0.99, 1.0).is_err());
409 }
410
411 #[test]
412 fn error_on_invalid_ph() {
413 let s = vec![0.9_f64; 5];
414 assert!(compute_coding_params(&s, 0.0, 1.0).is_err());
415 assert!(compute_coding_params(&s, 1.0, 1.0).is_err());
416 assert!(compute_coding_params(&s, -0.1, 1.0).is_err());
417 }
418
419 #[test]
420 fn error_on_zero_q_target() {
421 let s = vec![0.9_f64; 5];
422 assert!(compute_coding_params(&s, 0.99, 0.0).is_err());
423 }
424
425 #[test]
426 fn n_capped_at_peer_count() {
427 // 3 peers, q_target=5.0 → n would be huge; must be capped at 3
428 let stabilities = vec![0.9_f64; 3];
429 let params = compute_coding_params(&stabilities, 0.9, 5.0).unwrap();
430 assert!(params.n <= 3);
431 }
432
433 #[test]
434 fn effective_recovery_probability_k1_all_good() {
435 let stabilities = vec![1.0_f64; 5];
436 let pe = effective_recovery_probability(&stabilities, 1);
437 assert!(approx_eq(pe, 1.0, 1e-6));
438 }
439
440 #[test]
441 fn effective_recovery_probability_consistent_with_params() {
442 let stabilities = vec![0.85_f64; 15];
443 let params = compute_coding_params(&stabilities, 0.99, 1.0).unwrap();
444 let pe = effective_recovery_probability(&stabilities, params.k);
445 // Should be >= Ph (by construction of k)
446 assert!(pe >= 0.99 - 1e-3, "pe={pe} should be ≈0.99");
447 }
448}