bp_core/coding/
params.rs

1//! Network-aware coding parameters — adaptive `k` computation.
2//!
3//! ## Problem
4//!
5//! Given a network of `N` Pouch peers, each with an independent availability
6//! probability `p_i` (the **stability score** from [`crate::network::qos`]),
7//! and given a target high recovery probability `Ph`, we need to find the
8//! largest integer `k` such that:
9//!
10//! ```text
11//! P(X ≥ k) ≥ Ph     where  X = Σ Bernoulli(p_i)
12//! ```
13//!
14//! `X` follows a **Poisson-Binomial** distribution.
15//!
16//! ## Normal approximation
17//!
18//! For the sizes encountered in practice (N ≥ 5), the Poisson-Binomial can be
19//! approximated by a normal distribution with matching mean and variance:
20//!
21//! ```text
22//! μ  = Σ p_i              (expected recoverable fragments)
23//! σ² = Σ p_i·(1−p_i)      (variance)
24//!
25//! P(X ≥ k) ≈ 1 − Φ((k − 0.5 − μ) / σ)   ← continuity correction
26//! ```
27//!
28//! Solving for k:
29//!
30//! ```text
31//! k = ⌊ μ − z(Ph) · σ + 0.5 ⌋
32//! ```
33//!
34//! where `z(Ph)` is the standard-normal `Ph`-quantile (probit).
35//!
36//! ## Redundancy overhead `q`
37//!
38//! The total number of fragments distributed per chunk is:
39//!
40//! ```text
41//! n = round(k · (1 + q_target))
42//! q = (n − k) / k              (effective overhead fraction)
43//! ```
44//!
45//! The default `q_target = 1.0` means each chunk generates `~2k` fragments
46//! (one copy-equivalent of redundancy on top of the recovery threshold).
47//!
48//! ## Rolling pe
49//!
50//! After upload, the actual observed probability `pe` can be recomputed at any
51//! time using [`effective_recovery_probability`] with the latest stability scores
52//! and the stored `k`.
53
54use crate::error::{BpError, BpResult};
55
56// ── Public parameters record ──────────────────────────────────────────────────
57
58/// Coding parameters computed for a specific network state and target `Ph`.
59///
60/// Stored in the [`FileManifest`](crate::storage::manifest::FileManifest)
61/// at upload time and used by the daemon to determine how many fragments to
62/// generate and distribute per chunk.
63#[derive(Debug, Clone)]
64pub struct NetworkCodingParams {
65    /// Target recovery probability requested at computation time.
66    pub ph: f64,
67    /// Recovery threshold: the minimum fragments needed to reconstruct a chunk.
68    pub k: usize,
69    /// Total fragments to generate per chunk (= k + redundancy).
70    pub n: usize,
71    /// Effective redundancy overhead: `(n − k) / k`.
72    pub q: f64,
73    /// Number of Pouch peers considered in this computation.
74    pub peer_count: usize,
75    /// Poisson-Binomial mean `μ = Σ p_i`.
76    pub mu: f64,
77    /// Poisson-Binomial std-dev `σ = sqrt(Σ p_i·(1−p_i))`.
78    pub sigma: f64,
79}
80
81impl NetworkCodingParams {
82    /// Effective recovery probability given the same set of stability scores.
83    ///
84    /// Useful for re-evaluating `pe` with updated QoS data after upload.
85    pub fn effective_probability(&self) -> f64 {
86        if self.sigma < 1e-9 {
87            return if self.k as f64 <= self.mu { 1.0 } else { 0.0 };
88        }
89        let z = (self.k as f64 - 0.5 - self.mu) / self.sigma;
90        1.0 - standard_normal_cdf(z)
91    }
92}
93
94// ── Main entry point ──────────────────────────────────────────────────────────
95
96/// Compute the optimal coding parameters for the given network state.
97///
98/// # Arguments
99///
100/// - `stabilities` — slice of per-peer stability scores `p_i ∈ [0.0, 1.0]`
101///   produced by [`crate::network::qos::QosRegistry::all_stability_scores`].
102/// - `ph`          — target recovery probability, e.g. `0.999`.
103/// - `q_target`    — desired redundancy overhead fraction, e.g. `1.0` (= 2×k
104///   total fragments per chunk).  Must be `> 0.0`.
105///
106/// # Errors
107///
108/// - No peers provided (empty `stabilities`).
109/// - `ph` outside `(0.0, 1.0)`.
110/// - `q_target ≤ 0.0`.
111/// - Network too small or too unreliable to satisfy `Ph` (k would be ≤ 0).
112pub fn compute_coding_params(
113    stabilities: &[f64],
114    ph: f64,
115    q_target: f64,
116) -> BpResult<NetworkCodingParams> {
117    if stabilities.is_empty() {
118        return Err(BpError::Coding(
119            "Cannot compute coding params: no peers available".into(),
120        ));
121    }
122    if !(0.0 < ph && ph < 1.0) {
123        return Err(BpError::Coding(format!("Ph ({ph}) must be in (0.0, 1.0)")));
124    }
125    if q_target <= 0.0 {
126        return Err(BpError::Coding(format!(
127            "q_target ({q_target}) must be > 0.0"
128        )));
129    }
130
131    let peer_count = stabilities.len();
132
133    // Poisson-Binomial mean and variance
134    let mu: f64 = stabilities.iter().sum();
135    let sigma_sq: f64 = stabilities.iter().map(|&p| p * (1.0 - p)).sum();
136    let sigma = sigma_sq.sqrt();
137
138    // k = floor(μ − z(Ph) · σ + 0.5)
139    // z(Ph) is the probit (inverse standard-normal CDF) at Ph.
140    let z_ph = probit(ph);
141    let k_float = mu - z_ph * sigma + 0.5;
142
143    if k_float < 1.0 {
144        return Err(BpError::Coding(format!(
145            "Network too small or unreliable to achieve Ph={ph:.4}: \
146             computed k={k_float:.2} < 1 (μ={mu:.2}, σ={sigma:.2})"
147        )));
148    }
149
150    let k = k_float.floor() as usize;
151
152    // n = round(k * (1 + q_target)), but at least k+1
153    let n = ((k as f64) * (1.0 + q_target)).round().max(k as f64 + 1.0) as usize;
154
155    // n must not exceed total peer count (one fragment per Pouch)
156    let n = n.min(peer_count);
157
158    // After capping n, recheck that n >= k
159    if n < k {
160        return Err(BpError::Coding(format!(
161            "After capping to peer_count ({peer_count}), n ({n}) < k ({k}). \
162             Not enough peers to store the required fragments."
163        )));
164    }
165
166    let q = (n - k) as f64 / k as f64;
167
168    Ok(NetworkCodingParams {
169        ph,
170        k,
171        n,
172        q,
173        peer_count,
174        mu,
175        sigma,
176    })
177}
178
179/// Compute the **network storage utilisation factor** `k / N`.
180///
181/// Given the per-peer stability scores and a per-tier target recovery
182/// probability `Ph`, this returns the largest fraction `k / N ∈ [0.0, 1.0]`
183/// such that:
184///
185/// ```text
186/// P(X ≥ k) ≥ Ph    where X ~ PoissonBinomial(p_1, …, p_N)
187/// ```
188///
189/// Interpretation: for every raw byte stored by a node, only `k/N` bytes
190/// correspond to recoverable file content (the rest is redundancy).
191///
192/// # Arguments
193///
194/// - `stabilities` — per-Pouch stability scores including the **own node**.
195///   Use `0.4` as the default for a new node with no QoS history.
196/// - `ph`          — target recovery probability (from
197///   [`crate::network::ReputationTier::qos_target_ph`]).
198///
199/// Returns `0.0` when the network is empty, too small, or too unreliable
200/// to satisfy `Ph`.
201pub fn compute_network_storage_factor(stabilities: &[f64], ph: f64) -> f64 {
202    let n = stabilities.len();
203    if n == 0 || !(0.0 < ph && ph < 1.0) {
204        return 0.0;
205    }
206    let mu: f64 = stabilities.iter().sum();
207    let sigma: f64 = stabilities
208        .iter()
209        .map(|&p| p * (1.0 - p))
210        .sum::<f64>()
211        .sqrt();
212    let z_ph = probit(ph);
213    let k_float = mu - z_ph * sigma + 0.5;
214    if k_float < 1.0 {
215        return 0.0;
216    }
217    let k = (k_float.floor() as usize).min(n);
218    k as f64 / n as f64
219}
220
221/// Recompute the **rolling effective recovery probability** `Pe` for a file
222/// that was uploaded with threshold `k`, given the current stability scores.
223///
224/// This is called periodically by the daemon to update `pe` in the
225/// [`FileManifest`](crate::storage::manifest::FileManifest).
226///
227/// ```text
228/// Pe = P(X ≥ k)  ≈  1 − Φ((k − 0.5 − μ) / σ)
229/// ```
230pub fn effective_recovery_probability(stabilities: &[f64], k: usize) -> f64 {
231    let mu: f64 = stabilities.iter().sum();
232    let sigma_sq: f64 = stabilities.iter().map(|&p| p * (1.0 - p)).sum();
233    let sigma = sigma_sq.sqrt();
234    if sigma < 1e-9 {
235        return if k as f64 <= mu { 1.0 } else { 0.0 };
236    }
237    let z = (k as f64 - 0.5 - mu) / sigma;
238    1.0 - standard_normal_cdf(z)
239}
240
241// ── Statistics helpers ────────────────────────────────────────────────────────
242
243/// Standard normal CDF `Φ(x)` using a rational approximation.
244///
245/// Maximum absolute error < 7.5 × 10⁻⁸ (Hart, 1968 / Abramowitz & Stegun 26.2.17).
246pub fn standard_normal_cdf(x: f64) -> f64 {
247    // Use the complementary error function: Φ(x) = 0.5 · erfc(−x / √2)
248    let t = x / std::f64::consts::SQRT_2;
249    0.5 * erfc_approx(-t)
250}
251
252/// Inverse standard normal CDF (probit) using the Beasley-Springer-Moro
253/// rational approximation.  Accurate to ~10⁻⁹ for `p ∈ (10⁻⁶, 1−10⁻⁶)`.
254///
255/// Reference: Peter J. Acklam, "An algorithm for computing the inverse normal
256/// cumulative distribution function", 2003.
257pub fn probit(p: f64) -> f64 {
258    debug_assert!(0.0 < p && p < 1.0, "probit: p must be in (0, 1)");
259
260    // Coefficients for the rational approximation
261    const A: [f64; 6] = [
262        -3.969_683_028_665_376e1,
263        2.209_460_984_245_205e2,
264        -2.759_285_104_469_687e2,
265        1.383_577_518_672_69e2,
266        -3.066_479_806_614_716e1,
267        2.506_628_277_459_239,
268    ];
269    const B: [f64; 5] = [
270        -5.447_609_879_822_406e1,
271        1.615_858_368_580_409e2,
272        -1.556_989_798_598_866e2,
273        6.680_131_188_771_972e1,
274        -1.328_068_155_288_572e1,
275    ];
276    const C: [f64; 6] = [
277        -7.784_894_002_430_293e-3,
278        -3.223_964_580_411_365e-1,
279        -2.400_758_277_161_838,
280        -2.549_732_539_343_734,
281        4.374_664_141_464_968,
282        2.938_163_982_698_783,
283    ];
284    const D: [f64; 4] = [
285        7.784_695_709_041_462e-3,
286        3.224_671_290_700_398e-1,
287        2.445_134_137_142_996,
288        3.754_408_661_907_416,
289    ];
290
291    let p_lo = 0.02425_f64;
292    let p_hi = 1.0 - p_lo;
293
294    if p < p_lo {
295        // Rational approximation for lower tail
296        let q = (-2.0 * p.ln()).sqrt();
297        (((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
298            / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
299    } else if p <= p_hi {
300        // Rational approximation for central region
301        let q = p - 0.5;
302        let r = q * q;
303        (((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q
304            / (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1.0)
305    } else {
306        // Rational approximation for upper tail (use symmetry)
307        -probit(1.0 - p)
308    }
309}
310
311/// Complementary error function approximation used internally by `standard_normal_cdf`.
312fn erfc_approx(x: f64) -> f64 {
313    // Horner-form Chebyshev approximation; error < 1.5e-7 for all real x.
314    let t = 1.0 / (1.0 + 0.3275911 * x.abs());
315    let poly = t
316        * (0.254_829_592
317            + t * (-0.284_496_736
318                + t * (1.421_413_741 + t * (-1.453_152_027 + t * 1.061_405_429))));
319    let approx = poly * (-x * x).exp();
320    if x >= 0.0 {
321        approx
322    } else {
323        2.0 - approx
324    }
325}
326
327// ── Tests ─────────────────────────────────────────────────────────────────────
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332
333    fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
334        (a - b).abs() < tol
335    }
336
337    // ── Statistics ────────────────────────────────────────────────────────
338
339    #[test]
340    fn standard_normal_cdf_known_values() {
341        assert!(approx_eq(standard_normal_cdf(0.0), 0.5, 1e-6));
342        assert!(approx_eq(standard_normal_cdf(1.0), 0.841_344_7, 1e-5));
343        assert!(approx_eq(standard_normal_cdf(-1.0), 0.158_655_3, 1e-5));
344        assert!(approx_eq(standard_normal_cdf(3.0), 0.998_650_1, 1e-5));
345        assert!(approx_eq(standard_normal_cdf(-3.0), 0.001_349_9, 1e-5));
346    }
347
348    #[test]
349    fn probit_known_values() {
350        // probit(Φ(z)) ≈ z
351        for z in [-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0] {
352            let p = standard_normal_cdf(z);
353            assert!(
354                approx_eq(probit(p), z, 1e-4),
355                "z={z} p={p} probit={}",
356                probit(p)
357            );
358        }
359    }
360
361    #[test]
362    fn probit_0_5_is_zero() {
363        assert!(probit(0.5).abs() < 1e-9);
364    }
365
366    // ── compute_coding_params ─────────────────────────────────────────────
367
368    #[test]
369    fn uniform_perfect_peers() {
370        // 10 peers, each with stability 1.0, Ph=0.999
371        // μ=10, σ=0 → k = floor(10 - 3.09*0 + 0.5) = 10
372        // But σ≈0, so k = floor(μ + 0.5) = 10
373        let stabilities = vec![1.0_f64; 10];
374        let params = compute_coding_params(&stabilities, 0.999, 1.0).unwrap();
375        assert!(params.k > 0);
376        assert!(params.k <= 10);
377        assert!(params.n >= params.k);
378        assert!(approx_eq(
379            params.q,
380            (params.n - params.k) as f64 / params.k as f64,
381            1e-9
382        ));
383    }
384
385    #[test]
386    fn compute_k_decreases_with_lower_ph() {
387        let stabilities = vec![0.9_f64; 20];
388        let p_high = compute_coding_params(&stabilities, 0.999, 1.0).unwrap();
389        let p_low = compute_coding_params(&stabilities, 0.9, 1.0).unwrap();
390        // Lower Ph allows a higher k (fewer margins needed)
391        assert!(p_high.k <= p_low.k || p_high.k == p_low.k);
392    }
393
394    #[test]
395    fn compute_k_with_mixed_stabilities() {
396        // Heterogeneous network: some fast, some unreliable peers
397        let stabilities = vec![0.95, 0.9, 0.8, 0.7, 0.6, 0.95, 0.85, 0.75];
398        let params = compute_coding_params(&stabilities, 0.99, 1.0).unwrap();
399        assert!(params.k >= 1);
400        assert!(params.n >= params.k);
401        assert!(params.ph == 0.99);
402        // Effective probability should be close to or above ph
403        assert!(params.effective_probability() >= 0.9);
404    }
405
406    #[test]
407    fn error_on_empty_stabilities() {
408        assert!(compute_coding_params(&[], 0.99, 1.0).is_err());
409    }
410
411    #[test]
412    fn error_on_invalid_ph() {
413        let s = vec![0.9_f64; 5];
414        assert!(compute_coding_params(&s, 0.0, 1.0).is_err());
415        assert!(compute_coding_params(&s, 1.0, 1.0).is_err());
416        assert!(compute_coding_params(&s, -0.1, 1.0).is_err());
417    }
418
419    #[test]
420    fn error_on_zero_q_target() {
421        let s = vec![0.9_f64; 5];
422        assert!(compute_coding_params(&s, 0.99, 0.0).is_err());
423    }
424
425    #[test]
426    fn n_capped_at_peer_count() {
427        // 3 peers, q_target=5.0 → n would be huge; must be capped at 3
428        let stabilities = vec![0.9_f64; 3];
429        let params = compute_coding_params(&stabilities, 0.9, 5.0).unwrap();
430        assert!(params.n <= 3);
431    }
432
433    #[test]
434    fn effective_recovery_probability_k1_all_good() {
435        let stabilities = vec![1.0_f64; 5];
436        let pe = effective_recovery_probability(&stabilities, 1);
437        assert!(approx_eq(pe, 1.0, 1e-6));
438    }
439
440    #[test]
441    fn effective_recovery_probability_consistent_with_params() {
442        let stabilities = vec![0.85_f64; 15];
443        let params = compute_coding_params(&stabilities, 0.99, 1.0).unwrap();
444        let pe = effective_recovery_probability(&stabilities, params.k);
445        // Should be >= Ph (by construction of k)
446        assert!(pe >= 0.99 - 1e-3, "pe={pe} should be ≈0.99");
447    }
448}