From 61a2320e5833303cafa3626b1e8edec79ff8d38c Mon Sep 17 00:00:00 2001 From: STEVAN Antoine <antoine.stevan@isae-supaero.fr> Date: Thu, 23 May 2024 08:24:28 +0000 Subject: [PATCH] define more complex inbreeding strategies (dragoon/komodo!103) this MR: - refactors the "inbreeding" example into `examples/inbreeding/` - adds `--strategy` and `--environment` - `Strategy::draw` will draw the number of shards to keep for recoding - `Environment::update` will update the pool of shards by losing some of them --- Cargo.toml | 4 + Makefile | 1 + examples/inbreeding/environment.rs | 184 ++++++++++++++++++ examples/{inbreeding.rs => inbreeding/mod.rs} | 132 ++++++++++--- examples/inbreeding/strategy.rs | 149 ++++++++++++++ 5 files changed, 439 insertions(+), 31 deletions(-) create mode 100644 examples/inbreeding/environment.rs rename examples/{inbreeding.rs => inbreeding/mod.rs} (67%) create mode 100644 examples/inbreeding/strategy.rs diff --git a/Cargo.toml b/Cargo.toml index 84d67a8c..d6490d6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,3 +83,7 @@ path = "examples/benches/recoding.rs" [[example]] name = "bench_fec" path = "examples/benches/fec.rs" + +[[example]] +name = "inbreeding" +path = "examples/inbreeding/mod.rs" diff --git a/Makefile b/Makefile index 56ef73bb..23d35c34 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ clippy: test: cargo test --workspace --verbose + cargo test --examples --verbose nu tests/cli.nu nu tests/binary.nu diff --git a/examples/inbreeding/environment.rs b/examples/inbreeding/environment.rs new file mode 100644 index 00000000..ff223dfd --- /dev/null +++ b/examples/inbreeding/environment.rs @@ -0,0 +1,184 @@ +use rand::{seq::SliceRandom, RngCore}; + +#[derive(Debug, PartialEq)] +pub(super) enum Environment { + /// at each time step, remove `n` elements with probability `p` + RandomFixed { p: f64, n: usize }, + /// at each time step, remove a fraction `q` of the elements with probability `p` + RandomDynamic { p: f64, q: f64 }, + /// at each time step, remove `n` elements + Fixed { n: usize }, +} + +impl Environment { + /// `update(things, rng)` is `things` with some elements potentially removed according to the + /// [`Environment`] type + pub(super) fn update<T: Clone>(&self, things: &[T], rng: &mut impl RngCore) -> Vec<T> { + let mut things = things.to_vec(); + things.shuffle(rng); + + match self { + Environment::Fixed { n } => things.iter().take(things.len() - n), + Environment::RandomFixed { p, n } => { + if rand::random::<f64>() > *p { + return things; + } + things.iter().take(things.len() - n) + } + Environment::RandomDynamic { p, q } => { + if rand::random::<f64>() > *p { + return things; + } + things + .iter() + .take((things.len() as f64 * (1.0 - q)) as usize) + } + } + .cloned() + .collect() + } + + pub(super) fn from_str(s: &str) -> Result<Self, String> { + let tokens: Vec<&str> = s.split(':').collect(); + if tokens.is_empty() { + return Err(format!( + "expected at least one :-separated token in '{}', found 0", + s, + )); + } + + match tokens[0] { + "fixed" => { + let n = match tokens[1].parse::<usize>() { + Ok(u) => u, + Err(_) => { + return Err(format!( + "could not parse positive integer from '{}'", + tokens[1] + )) + } + }; + Ok(Environment::Fixed { n }) + } + "random-fixed" => { + if tokens.len() != 3 { + return Err(format!( + "expected 3 :-separated tokens in '{}', found {}", + s, + tokens.len(), + )); + } + + let p = match tokens[1].parse::<f64>() { + Ok(f) => f, + Err(_) => return Err(format!("could not parse float from '{}'", tokens[1])), + }; + if !(0.0..=1.0).contains(&p) { + return Err(format!("p should be a probability, found {}", p)); + } + let n = match tokens[2].parse::<usize>() { + Ok(u) => u, + Err(_) => { + return Err(format!( + "could not parse positive integer from '{}'", + tokens[2] + )) + } + }; + Ok(Environment::RandomFixed { p, n }) + } + "random-dynamic" => { + if tokens.len() != 3 { + return Err(format!( + "expected 3 :-separated tokens in '{}', found {}", + s, + tokens.len(), + )); + } + + let p = match tokens[1].parse::<f64>() { + Ok(f) => f, + Err(_) => return Err(format!("could not parse float from '{}'", tokens[1])), + }; + if !(0.0..=1.0).contains(&p) { + return Err(format!("p should be a probability, found {}", p)); + } + + let q = match tokens[2].parse::<f64>() { + Ok(f) => f, + Err(_) => return Err(format!("could not parse float from '{}'", tokens[2])), + }; + if !(0.0..=1.0).contains(&q) { + return Err(format!("q should be between 0 and 1, found {}", q)); + } + + Ok(Environment::RandomDynamic { p, q }) + } + ty => Err(format!("unknow env type '{}'", ty)), + } + } +} + +#[cfg(test)] +mod test { + #[test] + fn environment() { + assert_eq!( + super::Environment::from_str("fixed:1"), + Ok(super::Environment::Fixed { n: 1 }) + ); + assert_eq!( + super::Environment::from_str("random-fixed:0.2:1"), + Ok(super::Environment::RandomFixed { p: 0.2, n: 1 }) + ); + assert_eq!( + super::Environment::from_str("random-dynamic:0.2:0.3"), + Ok(super::Environment::RandomDynamic { p: 0.2, q: 0.3 }) + ); + + let cases = vec![ + ("foo", "unknow env type 'foo'"), + ("foo:", "unknow env type 'foo'"), + ("fixed:", "could not parse positive integer from ''"), + ("fixed:foo", "could not parse positive integer from 'foo'"), + ( + "random-fixed:", + "expected 3 :-separated tokens in 'random-fixed:', found 2", + ), + ("random-fixed:foo:", "could not parse float from 'foo'"), + ("random-fixed:1.2:", "p should be a probability, found 1.2"), + ( + "random-fixed:0.2:", + "could not parse positive integer from ''", + ), + ( + "random-fixed:0.2:foo", + "could not parse positive integer from 'foo'", + ), + ( + "random-dynamic:", + "expected 3 :-separated tokens in 'random-dynamic:', found 2", + ), + ("random-dynamic:foo:", "could not parse float from 'foo'"), + ( + "random-dynamic:1.2:", + "p should be a probability, found 1.2", + ), + ("random-dynamic:0.2:", "could not parse float from ''"), + ("random-dynamic:0.2:foo", "could not parse float from 'foo'"), + ( + "random-dynamic:0.2:1.2", + "q should be between 0 and 1, found 1.2", + ), + ]; + + for (input, expected_error) in cases { + assert_eq!( + super::Environment::from_str(input), + Err(expected_error.to_string()), + "input: {}", + input + ); + } + } +} diff --git a/examples/inbreeding.rs b/examples/inbreeding/mod.rs similarity index 67% rename from examples/inbreeding.rs rename to examples/inbreeding/mod.rs index cd683754..01bdd0a3 100644 --- a/examples/inbreeding.rs +++ b/examples/inbreeding/mod.rs @@ -22,8 +22,20 @@ /// --test-case end-to-end /// ] | lines | into float | save --force baseline.nuon /// -/// seq 1 $K | reverse | each {|r| -/// let inbreeding = ./target/release/examples/inbreeding ...[ +/// let strategies = seq 1 $K +/// | each { $"single:($in)" } +/// | append [ +/// "double:0.5:1:2", +/// "double:0.5:2:3", +/// "double:0.333:1:2", +/// "double:0.666:1:2", +/// "double:0.333:2:3", +/// "double:0.666:2:3" +/// ] +/// let environment = "fixed:0" +/// +/// $strategies | each {|s| +/// let diversity = ./target/release/examples/inbreeding ...[ /// $NB_BYTES, /// -k $K /// -n $N @@ -31,41 +43,76 @@ /// --measurement-schedule $MEASUREMENT_SCHEDULE /// -t $MAX_T /// --test-case recoding -/// -r $r +/// --strategy $s +/// --environment $environment /// ] | lines | into float /// /// { -/// r: $r, -/// inbreeding: $inbreeding, +/// strategy: $s, +/// diversity: $diversity, /// } /// } | save --force inbreeding.nuon /// ``` /// - plot the results /// ```nushell /// let data = open inbreeding.nuon -/// let k = $data.r | math max /// let w = 3 -/// let l = $data.inbreeding.0 | length +/// let l = $data.diversity.0 | length /// /// use std repeat /// -/// # let raw = $data | update inbreeding { take ($l - $w + 1)} -/// # let smooth = $data | update inbreeding { prepend (1 | repeat $w) | window $w | each { math avg } } +/// def "parse strategy" []: string -> record<type: string> { +/// let s = $in +/// +/// if ($s | str starts-with "single") { +/// let res = $s +/// | parse "single:{n}" +/// | into record +/// | into int n +/// { type: "single", n: $res.n } +/// } else { +/// let res = $s +/// | parse "double:{p}:{n}:{m}" +/// | into record +/// | into float p +/// | into int n +/// | into int m +/// { type: "double", p: $res.p, n: $res.n, m: $res.m } +/// } +/// } +/// +/// # let raw = $data | update diversity { take ($l - $w + 1)} +/// # let smooth = $data | update diversity { prepend (1 | repeat $w) | window $w | each { math avg } } /// let smooth = $data /// /// $smooth +/// | update strategy { parse strategy } +/// | insert sort {|it| +/// match $it.strategy.type { +/// "single" => [$it.strategy.n, 1.0] +/// "double" => [$it.strategy.n, $it.strategy.p] +/// } +/// } +/// | sort-by sort +/// | reverse +/// | reject sort /// | insert name {|it| -/// let r = if $it.r == $k { "k" } else { $"k - ($k - $it.r)" } -/// $"$\\sigma = ($r)$" +/// match $it.strategy.type { +/// "single" => { +/// let sigma = if $it.strategy.n == $K { "k" } else { $"k - ($K - $it.strategy.n)" } +/// $"$\\sigma = ($sigma) = ($it.strategy.n)$" +/// } +/// "double" => $"($it.strategy.p)? ($it.strategy.n) ; ($it.strategy.m)" +/// } /// } /// # | append ($raw | insert name null | insert style { line: { alpha: 0.1 } }) -/// | update inbreeding {|it| -/// let l = $it.inbreeding | length -/// $it.inbreeding | wrap y | merge (seq 0 $l | wrap x) | insert e 0 +/// | update diversity {|it| +/// let l = $it.diversity | length +/// $it.diversity | wrap y | merge (seq 0 $l | wrap x) | insert e 0 /// } -/// | rename --column { inbreeding: "points" } -/// | insert style.color {|it| -/// match $it.r { +/// | rename --column { diversity: "points" } +/// | insert style {|it| +/// let color = match $it.strategy.n { /// 10 => "tab:red", /// 9 => "tab:orange", /// 8 => "tab:olive", @@ -77,8 +124,10 @@ /// 2 => "tab:pink", /// _ => "tab:gray", /// } +/// +/// { color: $color, line: { alpha: ($it.strategy.p? | default 1.0) } } /// } -/// | reject r +/// | reject strategy /// | save --force /tmp/graphs.json /// ``` /// ``` @@ -114,6 +163,11 @@ use komodo::{ }; use rand::{rngs::ThreadRng, seq::SliceRandom, thread_rng, Rng, RngCore}; +mod environment; +mod strategy; + +use crate::{environment::Environment, strategy::Strategy}; + fn random_bytes(n: usize, rng: &mut ThreadRng) -> Vec<u8> { (0..n).map(|_| rng.gen::<u8>()).collect() } @@ -204,7 +258,8 @@ fn recoding<F, Fun>( k: usize, n: usize, max_t: usize, - nb_shards_to_recode: usize, + strategy: Strategy, + env: Environment, nb_measurements: usize, measurement_schedule: Fun, rng: &mut impl RngCore, @@ -229,11 +284,13 @@ where } // recode a new random shard - shards.shuffle(rng); - let s: Vec<_> = shards.iter().take(nb_shards_to_recode).cloned().collect(); - let new_shard = fec::recode_random(&s, rng).unwrap().unwrap(); + let new_shard = fec::recode_random(&strategy.draw(&shards, rng), rng) + .unwrap() + .unwrap(); shards.push(new_shard); + shards = env.update(&shards, rng); + pb.inc(1); } pb.finish_with_message("done"); @@ -259,8 +316,15 @@ struct Cli { n: usize, #[arg(short)] t: usize, - #[arg(short)] - r: Option<usize>, + /// something of the form `<p>:<i>,<j>` + /// at each time step, shard $i$ will be used for recoding with probability $p$, otherwise, $j$ + /// will be used with probability $1 - p$ + #[arg(long)] + strategy: Option<String>, + /// something of the form `random-dynamic:<p>:<q>` where a proportion $q$ of the shards will be removed at + /// each step with probability $p$ + #[arg(long)] + environment: Option<String>, #[arg(long)] test_case: TestCase, @@ -309,23 +373,29 @@ fn main() { ); } TestCase::Recoding => { - if cli.r.is_none() { - eprintln!("recoding needs -r"); + if cli.strategy.is_none() { + eprintln!("recoding needs --strategy"); exit(1); } + if cli.environment.is_none() { + eprintln!("recoding needs --environment"); + exit(1); + } + + let environment = Environment::from_str(&cli.environment.unwrap()).unwrap(); + let strategy = Strategy::from_str(&cli.strategy.unwrap()).unwrap(); eprintln!( - "true: k = {}, n = {}, sigma = {}", - cli.k, - cli.n, - cli.r.unwrap(), + "true: k = {}, n = {}, strategy = {:?}, environment = {:?}", + cli.k, cli.n, strategy, environment, ); let _ = recoding::<ark_pallas::Fr, _>( &bytes, cli.k, cli.n, cli.t, - cli.r.unwrap(), + strategy, + environment, cli.nb_measurements, measurement_schedule, &mut rng, diff --git a/examples/inbreeding/strategy.rs b/examples/inbreeding/strategy.rs new file mode 100644 index 00000000..0bc12642 --- /dev/null +++ b/examples/inbreeding/strategy.rs @@ -0,0 +1,149 @@ +use rand::{seq::SliceRandom, RngCore}; + +#[derive(Debug, PartialEq)] +pub(super) enum Strategy { + Single { n: usize }, + Double { p: f64, n: usize, m: usize }, +} + +impl Strategy { + pub(super) fn draw<T: Clone>(&self, things: &[T], rng: &mut impl RngCore) -> Vec<T> { + let mut things = things.to_vec(); + things.shuffle(rng); + + let nb_to_take = match self { + Self::Single { n } => *n, + Self::Double { p, n, m } => { + if rand::random::<f64>() < *p { + *n + } else { + *m + } + } + }; + + things.iter().take(nb_to_take).cloned().collect() + } + + pub(super) fn from_str(s: &str) -> Result<Self, String> { + if !s.contains(':') { + return Err(format!( + "expected at least one :-separated token in '{}', found 0", + s, + )); + } + + let tokens: Vec<&str> = s.split(':').collect(); + + match tokens[0] { + "single" => { + if tokens.len() != 2 { + return Err(format!( + "expected 2 :-separated tokens in '{}', found {}", + s, + tokens.len(), + )); + } + + let n = match tokens[1].parse::<usize>() { + Ok(u) => u, + Err(_) => { + return Err(format!( + "could not parse positive integer from '{}'", + tokens[1] + )) + } + }; + Ok(Self::Single { n }) + } + "double" => { + if tokens.len() != 4 { + return Err(format!( + "expected 4 :-separated tokens in '{}', found {}", + s, + tokens.len(), + )); + } + + let p = match tokens[1].parse::<f64>() { + Ok(f) => f, + Err(_) => return Err(format!("could not parse float from '{}'", tokens[1])), + }; + if !(0.0..=1.0).contains(&p) { + return Err(format!("p should be a probability, found {}", p)); + } + + let n = match tokens[2].parse::<usize>() { + Ok(u) => u, + Err(_) => { + return Err(format!( + "could not parse positive integer from '{}'", + tokens[2] + )) + } + }; + let m = match tokens[3].parse::<usize>() { + Ok(u) => u, + Err(_) => { + return Err(format!( + "could not parse positive integer from '{}'", + tokens[3] + )) + } + }; + + Ok(Self::Double { p, n, m }) + } + ty => Err(format!("unknown strat type '{}'", ty)), + } + } +} + +#[cfg(test)] +mod test { + #[test] + fn strategy() { + assert_eq!( + super::Strategy::from_str("single:3"), + Ok(super::Strategy::Single { n: 3 }) + ); + assert_eq!( + super::Strategy::from_str("double:0.1:1:2"), + Ok(super::Strategy::Double { p: 0.1, n: 1, m: 2 }) + ); + + let cases = vec![ + ( + "foo", + "expected at least one :-separated token in 'foo', found 0", + ), + ("foo:bar:baz", "unknown strat type 'foo'"), + ("single:", "could not parse positive integer from ''"), + ( + "single::", + "expected 2 :-separated tokens in 'single::', found 3", + ), + ( + "double:bar:baz", + "expected 4 :-separated tokens in 'double:bar:baz', found 3", + ), + ( + "double:bar:baz:spam:eggs", + "expected 4 :-separated tokens in 'double:bar:baz:spam:eggs', found 5", + ), + ("double:bar::", "could not parse float from 'bar'"), + ("double:1.2::", "p should be a probability, found 1.2"), + ("double:0.2::", "could not parse positive integer from ''"), + ("double:0.2:1:", "could not parse positive integer from ''"), + ]; + + for (input, expected_error) in cases { + assert_eq!( + super::Strategy::from_str(input), + Err(expected_error.to_string()), + "input: {}", + input, + ); + } + } +} -- GitLab