From 7d5fca82e16af56059211519f0fa4579e912d7e7 Mon Sep 17 00:00:00 2001 From: STEVAN Antoine <antoine.stevan@isae-supaero.fr> Date: Thu, 2 May 2024 10:54:48 +0000 Subject: [PATCH] add an example to study the _recoding inbreeding_ phenomenon (dragoon/komodo!97) this MR adds `examples/inbreeding.rs` which allows to do two things - _naive recoding_: in order to generate a new random shard, we first $k$-decode the whole data and then $1$-encode a single shard - _true recoding_: to achieve the same goal, we directly $k$-recode shards into a new one ## the scenario regardless of the _recoding strategy_, the scenario is the same 1. data is split into $k$ shards and $n$ original shards are generated 2. for a given number of steps $s$, $k$ shards are drawn randomly with replacement and we count the number of successful decoding, given a measure of the _diversity_, $$\delta = \frac{\#success}{\#attempts}$$ 3. create a new _recoded shard_ and add it to the $n$ previous ones, i.e. $n$ increases by one 4. repeat steps 2. and 3. as long as you want ## results  --- Cargo.toml | 1 + examples/inbreeding.rs | 265 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 examples/inbreeding.rs diff --git a/Cargo.toml b/Cargo.toml index da9d6d28..84d67a8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ ark-poly = "0.4.2" ark-serialize = "0.4.2" ark-std = "0.4.0" clap = { version = "4.5.4", features = ["derive"] } +indicatif = "0.17.8" plnk = { git = "https://gitlab.isae-supaero.fr/a.stevan/plnk", tag = "0.6.0", version = "0.6.0" } rand = "0.8.5" rs_merkle = "1.4.1" diff --git a/examples/inbreeding.rs b/examples/inbreeding.rs new file mode 100644 index 00000000..cc4e2441 --- /dev/null +++ b/examples/inbreeding.rs @@ -0,0 +1,265 @@ +/// # Example +/// - run the experiment +/// ```nushell +/// const NB_BYTES = 1_024 * 10 +/// const K = 10 +/// const N = 2 * $K +/// const NB_MEASUREMENTS = 1_000 +/// const MAX_T = 150 +/// +/// cargo run --example inbreeding -- ...[ +/// $NB_BYTES, +/// -k $K +/// -n $N +/// --nb-measurements $NB_MEASUREMENTS +/// -t $MAX_T +/// --test-case end-to-end +/// ] | lines | into float | save --force baseline.nuon +/// +/// seq 2 $K | reverse | each {|r| +/// let inbreeding = cargo run --example inbreeding -- ...[ +/// $NB_BYTES, +/// -k $K +/// -n $N +/// --nb-measurements $NB_MEASUREMENTS +/// -t $MAX_T +/// --test-case recoding +/// -r $r +/// ] | lines | into float +/// +/// { +/// r: $r, +/// inbreeding: $inbreeding, +/// } +/// } | save --force inbreeding.nuon +/// ``` +/// - plot the results +/// ```nushell +/// let data = open inbreeding.nuon +/// let k = $data.r | math max +/// let w = 3 +/// let l = $data.inbreeding.0 | length +/// +/// use std repeat +/// +/// # let raw = $data | update inbreeding { take ($l - $w + 1)} +/// let smooth = $data | update inbreeding { prepend (1 | repeat $w) | window $w | each { math avg } } +/// +/// $smooth +/// | insert name {|it| +/// let r = if $it.r == $k { "k" } else { $"k - ($k - $it.r)" } +/// $"$r = ($r)$" +/// } +/// # | append ($raw | insert name null | insert style { line: { alpha: 0.1 } }) +/// | update inbreeding {|it| +/// let l = $it.inbreeding | length +/// $it.inbreeding | wrap y | merge (seq 1 $l | wrap x) | insert e 0 +/// } +/// | rename --column { inbreeding: "points" } +/// | insert style.color {|it| +/// match $it.r { +/// 10 => "red", +/// 9 => "orange", +/// 8 => "yellow", +/// 7 => "blue", +/// 6 => "purple", +/// 5 => "green", +/// 4 => "cyan", +/// 3 => "black", +/// 2 => "magenta", +/// _ => "gray", +/// } +/// } +/// | save --force /tmp/graphs.json +/// ``` +/// ``` +/// let x_min = open /tmp/graphs.json | get points.0.x | math min +/// let x_max = open /tmp/graphs.json | get points.0.x | math max +/// +/// gplt plot ...[ +/// --graphs-file /tmp/graphs.json +/// --x-lim ($x_min - 1) ($x_max + 1) +/// --y-lim -0.01 1.01 +/// --fullscreen +/// ] +/// ``` +use std::process::exit; + +use ark_ff::PrimeField; + +use clap::{Parser, ValueEnum}; +use indicatif::ProgressBar; +use komodo::{ + error::KomodoError, + fec::{self, Shard}, + linalg::Matrix, +}; +use rand::{rngs::ThreadRng, seq::SliceRandom, thread_rng, Rng, RngCore}; + +fn random_bytes(n: usize, rng: &mut ThreadRng) -> Vec<u8> { + (0..n).map(|_| rng.gen::<u8>()).collect() +} + +fn setup<F: PrimeField>(bytes: &[u8], k: usize, n: usize) -> Result<Vec<Shard<F>>, KomodoError> { + let points: Vec<F> = (0..n) + .map(|i| F::from_le_bytes_mod_order(&i.to_le_bytes())) + .collect(); + let encoding_mat = Matrix::vandermonde_unchecked(&points, k); + let shards = fec::encode(bytes, &encoding_mat)?; + + Ok(shards) +} + +fn measure_inbreeding<F: PrimeField>( + shards: &[Shard<F>], + k: usize, + nb_measurements: usize, + rng: &mut impl RngCore, +) -> f64 { + let mut s: Vec<_> = shards.to_vec(); + let mut count = 0; + for _ in 0..nb_measurements { + // get any k of the shards + s.shuffle(rng); + if fec::decode(s.iter().take(k).cloned().collect()).is_ok() { + count += 1; + } + } + + count as f64 / nb_measurements as f64 +} + +fn end_to_end<F: PrimeField>( + bytes: &[u8], + k: usize, + n: usize, + max_t: usize, + nb_measurements: usize, + rng: &mut impl RngCore, +) -> Result<(), KomodoError> { + let original_shards = setup(bytes, k, n)?; + let mut shards = original_shards.clone(); + + let pb = ProgressBar::new(max_t as u64); + for _ in 0..max_t { + let inbreeding = measure_inbreeding(&shards, k, nb_measurements, rng); + println!("{}", inbreeding); + + // decode the data + let data = fec::decode(original_shards.clone())?; + + // re-encode a new random shard + let encoding_mat = Matrix::vandermonde_unchecked(&[F::rand(rng)], k); + let new_shard = fec::encode(&data, &encoding_mat)?.first().unwrap().clone(); + shards.push(new_shard); + + pb.inc(1); + } + pb.finish_with_message("done"); + + Ok(()) +} + +fn recoding<F: PrimeField>( + bytes: &[u8], + k: usize, + n: usize, + max_t: usize, + nb_shards_to_recode: usize, + nb_measurements: usize, + rng: &mut impl RngCore, +) -> Result<(), KomodoError> { + let mut shards = setup(bytes, k, n)?; + + let pb = ProgressBar::new(max_t as u64); + for _ in 0..max_t { + let inbreeding = measure_inbreeding(&shards, k, nb_measurements, rng); + println!("{}", inbreeding); + + // recode a new random shard + let coeffs: Vec<F> = (0..nb_shards_to_recode).map(|_| F::rand(rng)).collect(); + let s: Vec<_> = shards.iter().take(nb_shards_to_recode).cloned().collect(); + let new_shard = fec::recode_with_coeffs(&s, &coeffs).unwrap(); + shards.push(new_shard); + + pb.inc(1); + } + pb.finish_with_message("done"); + + Ok(()) +} + +#[derive(ValueEnum, Clone)] +enum TestCase { + EndToEnd, + Recoding, +} + +#[derive(Parser)] +#[command(version, about, long_about = None)] +struct Cli { + #[arg()] + nb_bytes: usize, + + #[arg(short)] + k: usize, + #[arg(short)] + n: usize, + #[arg(short)] + t: usize, + #[arg(short)] + r: Option<usize>, + + #[arg(long)] + test_case: TestCase, + + /// the number of measurements to repeat each case, larger values will reduce the variance of + /// the measurements + #[arg(long)] + nb_measurements: usize, +} + +fn main() { + let cli = Cli::parse(); + + if cli.nb_measurements == 0 { + eprintln!( + "`--nb-measurements` should be a strictly positive integer, found {}", + cli.nb_measurements + ); + exit(1); + } + + let mut rng = thread_rng(); + + let bytes = random_bytes(cli.nb_bytes, &mut rng); + + match cli.test_case { + TestCase::EndToEnd => { + let _ = end_to_end::<ark_pallas::Fr>( + &bytes, + cli.k, + cli.n, + cli.t, + cli.nb_measurements, + &mut rng, + ); + } + TestCase::Recoding => { + if cli.r.is_none() { + eprintln!("recoding needs -r"); + exit(1); + } + + let _ = recoding::<ark_pallas::Fr>( + &bytes, + cli.k, + cli.n, + cli.t, + cli.r.unwrap(), + cli.nb_measurements, + &mut rng, + ); + } + } +} -- GitLab