From 8be768bbea5e80b8fb21f99b66e0f269994454aa Mon Sep 17 00:00:00 2001
From: STEVAN Antoine <antoine.stevan@isae-supaero.fr>
Date: Mon, 23 Sep 2024 08:55:28 +0000
Subject: [PATCH] add examples and complete the documentation
 (dragoon/komodo!166)

## changelog
- _semi\_avid_, _kzg_ and _aplonk_ examples have been added
- the `fs` module has been hidden behind an `fs` feature
- the `conversions` module has been properly hidden behind the `test` config feature
- the documentation has been completed
- some error messages have been improved

> **Note**
>
> the documentation of aPlonK has been left as-is for now
---
 Cargo.toml             |   9 +++
 Makefile               |   2 +-
 README.md              |   2 +
 bins/saclin/Cargo.toml |   2 +-
 examples/README.md     |  17 +++++
 examples/aplonk.rs     |  84 ++++++++++++++++++++++++
 examples/kzg.rs        |  92 ++++++++++++++++++++++++++
 examples/semi_avid.rs  | 145 +++++++++++++++++++++++++++++++++++++++++
 src/algebra/mod.rs     |  70 +++++++++++++++++++-
 src/aplonk/ipa.rs      |  28 +++++---
 src/aplonk/mod.rs      |  28 ++++++--
 src/conversions.rs     |   2 -
 src/fec.rs             |   6 +-
 src/fs.rs              |  26 +++++++-
 src/kzg.rs             |   4 +-
 src/lib.rs             |  46 +++++++++++++
 src/semi_avid.rs       | 134 +++++++++++++++++++++++++++++++++++++
 src/zk.rs              |  32 +++++++--
 18 files changed, 697 insertions(+), 32 deletions(-)
 create mode 100644 examples/README.md
 create mode 100644 examples/aplonk.rs
 create mode 100644 examples/kzg.rs
 create mode 100644 examples/semi_avid.rs

diff --git a/Cargo.toml b/Cargo.toml
index 50e648d6..61eccf11 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,3 +33,12 @@ rand = "0.8.5"
 [features]
 kzg = ["dep:ark-poly-commit"]
 aplonk = ["dep:ark-poly-commit"]
+fs = []
+
+[[example]]
+name = "kzg"
+required-features = ["kzg"]
+
+[[example]]
+name = "aplonk"
+required-features = ["aplonk"]
diff --git a/Makefile b/Makefile
index db236d71..aac6952f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: fmt fmt-check check clippy test-rs test-nu test example show build-examples
+.PHONY: fmt fmt-check check clippy test-rs test-nu test example show doc build-examples
 
 DEFAULT_GOAL: fmt-check check clippy test-rs
 
diff --git a/README.md b/README.md
index ad90d70b..6fbc9694 100644
--- a/README.md
+++ b/README.md
@@ -28,5 +28,7 @@ A [CLI example](bins/saclin/examples/cli.nu) is also provided and can be run wit
 make example
 ```
 
+Other examples that showcase the Komodo API are available in [`examples/`](examples/).
+
 ## the benchmarks
 see [`benchmarks/`](benchmarks/README.md)
diff --git a/bins/saclin/Cargo.toml b/bins/saclin/Cargo.toml
index 5ac7706c..d47c13a2 100644
--- a/bins/saclin/Cargo.toml
+++ b/bins/saclin/Cargo.toml
@@ -13,7 +13,7 @@ ark-ff = "0.4.2"
 ark-poly = "0.4.2"
 ark-serialize = "0.4.2"
 ark-std = "0.4.0"
-komodo = { path = "../../" }
+komodo = { path = "../../", features = ["fs"] }
 rand = "0.8.5"
 tracing = "0.1.40"
 tracing-subscriber = "0.3.17"
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 00000000..01263104
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,17 @@
+the following examples have the same general structure:
+- [_aPlonK_](aplonk.rs)
+- [_KZG+_](kzg.rs)
+- [_Semi-AVID_](semi_avid.rs)
+
+```rust
+// some imports
+
+fn run() -> Result<(), KomodoError> {
+    // the example itself
+
+    Ok(())
+}
+
+fn main() {
+    run().unwrap();
+}
diff --git a/examples/aplonk.rs b/examples/aplonk.rs
new file mode 100644
index 00000000..8d25a06f
--- /dev/null
+++ b/examples/aplonk.rs
@@ -0,0 +1,84 @@
+use ark_bls12_381::Bls12_381;
+use ark_ec::{pairing::Pairing, AffineRepr};
+use ark_ff::PrimeField;
+use ark_poly::{univariate::DensePolynomial, DenseUVPolynomial};
+use std::ops::Div;
+
+use komodo::{
+    algebra,
+    algebra::linalg::Matrix,
+    aplonk::{commit, prove, setup, verify},
+    error::KomodoError,
+    fec::encode,
+    zk::trim,
+};
+
+fn run<E, P>() -> Result<(), KomodoError>
+where
+    E: Pairing,
+    P: DenseUVPolynomial<E::ScalarField>,
+    for<'a, 'b> &'a P: Div<&'b P, Output = P>,
+{
+    // the code parameters and the data to manipulate
+    let (k, n) = (3, 6_usize);
+    // NOTE: the size of the data needs to be a "power of 2" multiple of the finite field element
+    // size
+    let nb_bytes = k * 2 * (E::ScalarField::MODULUS_BIT_SIZE as usize / 8);
+    let bytes = include_bytes!("../assets/dragoon_133x133.png")[0..nb_bytes].to_vec();
+
+    // aPlonK needs a trusted setup to craft the proofs for each shard of encoded data. the bytes
+    // are arranged in an $m \times k$ matrix, possibly involving padding, where $k$ is the number
+    // of coefficients for each one of the $m$ polynomials
+    let degree = k - 1;
+    let vector_length_bound =
+        bytes.len() / (E::ScalarField::MODULUS_BIT_SIZE as usize / 8) / (degree + 1);
+    let params = setup::<E, P>(degree, vector_length_bound).expect("setup failed");
+    let (_, vk_psi) = trim(params.kzg.clone(), degree);
+
+    // build the $m$ polynomials from the data
+    let elements = algebra::split_data_into_field_elements::<E::ScalarField>(&bytes, k);
+    let mut polynomials = Vec::new();
+    for chunk in elements.chunks(k) {
+        polynomials.push(P::from_coefficients_vec(chunk.to_vec()))
+    }
+
+    // commit the polynomials
+    let commit = commit(polynomials.clone(), params.clone()).unwrap();
+
+    // encode the data with a Vandermonde encoding
+    let encoding_points = &(0..n)
+        .map(|i| E::ScalarField::from_le_bytes_mod_order(&i.to_le_bytes()))
+        .collect::<Vec<_>>();
+    let encoding_mat = Matrix::vandermonde_unchecked(encoding_points, k);
+    let shards = encode::<E::ScalarField>(&bytes, &encoding_mat)
+        .unwrap_or_else(|_| panic!("could not encode"));
+
+    // craft and attach one proof to each shard of encoded data
+    let blocks = prove::<E, P>(
+        commit,
+        polynomials,
+        shards,
+        encoding_points.clone(),
+        params.clone(),
+    )
+    .unwrap();
+
+    // verify that all the shards are valid
+    for (i, block) in blocks.iter().enumerate() {
+        assert!(verify::<E, P>(
+            block,
+            E::ScalarField::from_le_bytes_mod_order(&[i as u8]),
+            &vk_psi,
+            params.ipa.tau_1,
+            params.kzg.powers_of_g[0].into_group(),
+            params.kzg.h.into_group(),
+        )
+        .unwrap());
+    }
+
+    Ok(())
+}
+
+fn main() {
+    run::<Bls12_381, DensePolynomial<<Bls12_381 as Pairing>::ScalarField>>().unwrap();
+}
diff --git a/examples/kzg.rs b/examples/kzg.rs
new file mode 100644
index 00000000..571a581d
--- /dev/null
+++ b/examples/kzg.rs
@@ -0,0 +1,92 @@
+use ark_bls12_381::Bls12_381;
+use ark_ec::pairing::Pairing;
+use ark_ff::PrimeField;
+use ark_poly::univariate::DensePolynomial;
+use ark_poly::DenseUVPolynomial;
+use ark_poly_commit::kzg10::KZG10;
+use ark_std::ops::Div;
+use ark_std::test_rng;
+
+use komodo::{algebra, algebra::linalg::Matrix, error::KomodoError, fec::encode, kzg, zk::trim};
+
+fn run<E, P>() -> Result<(), KomodoError>
+where
+    E: Pairing,
+    P: DenseUVPolynomial<E::ScalarField>,
+    for<'a, 'b> &'a P: Div<&'b P, Output = P>,
+{
+    let rng = &mut test_rng();
+
+    // the code parameters and the data to manipulate
+    let (k, n) = (3, 6_usize);
+    let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+
+    // KZG+ needs a trusted setup to craft the proofs for each shard of encoded data. the bytes are
+    // arranged in an $m \times k$ matrix, possibly involving padding, where $k$ is the number of
+    // coefficients for each one of the $m$ polynomials
+    let degree = bytes.len() / (E::ScalarField::MODULUS_BIT_SIZE as usize / 8);
+    let params = KZG10::<E, P>::setup(degree, false, rng).expect("setup failed");
+    let (powers, verifier_key) = trim(params, degree);
+
+    // build the $m$ polynomials from the data
+    let elements = algebra::split_data_into_field_elements::<E::ScalarField>(&bytes, k);
+    let mut polynomials = Vec::new();
+    for chunk in elements.chunks(k) {
+        polynomials.push(P::from_coefficients_vec(chunk.to_vec()))
+    }
+
+    // commit the polynomials
+    let (commits, _) = kzg::commit(&powers, &polynomials).unwrap();
+
+    // encode the data with a Vandermonde encoding
+    let encoding_points = &(0..n)
+        .map(|i| E::ScalarField::from_le_bytes_mod_order(&i.to_le_bytes()))
+        .collect::<Vec<_>>();
+    let encoding_mat = Matrix::vandermonde_unchecked(encoding_points, k);
+    let shards = encode::<E::ScalarField>(&bytes, &encoding_mat)
+        .unwrap_or_else(|_| panic!("could not encode"));
+
+    // craft and attach one proof to each shard of encoded data
+    let blocks = kzg::prove::<E, P>(
+        commits,
+        polynomials,
+        shards,
+        encoding_points.clone(),
+        powers,
+    )
+    .expect("KZG+ proof failed");
+
+    // verify that all the shards are valid
+    for (i, block) in blocks.iter().enumerate() {
+        assert!(
+            kzg::verify::<E, P>(
+                block,
+                E::ScalarField::from_le_bytes_mod_order(&[i as u8]),
+                &verifier_key,
+            ),
+            "could not verify block {}",
+            i
+        );
+    }
+
+    // verify a batch of shards at once
+    assert!(
+        kzg::batch_verify(
+            &blocks[1..3],
+            &[
+                E::ScalarField::from_le_bytes_mod_order(&[1]),
+                E::ScalarField::from_le_bytes_mod_order(&[2]),
+                E::ScalarField::from_le_bytes_mod_order(&[3]),
+            ],
+            &verifier_key
+        )
+        .unwrap(),
+        "could not batch-verify blocks 1..3"
+    );
+
+    Ok(())
+}
+
+fn main() {
+    run::<Bls12_381, DensePolynomial<<Bls12_381 as Pairing>::ScalarField>>().unwrap();
+}
diff --git a/examples/semi_avid.rs b/examples/semi_avid.rs
new file mode 100644
index 00000000..23ae489c
--- /dev/null
+++ b/examples/semi_avid.rs
@@ -0,0 +1,145 @@
+use ark_bls12_381::{Fr, G1Projective};
+use ark_ec::CurveGroup;
+use ark_ff::PrimeField;
+use ark_poly::univariate::DensePolynomial;
+use ark_poly::DenseUVPolynomial;
+use ark_serialize::{CanonicalDeserialize, CanonicalSerialize, Compress, Validate};
+use ark_std::{ops::Div, test_rng};
+
+use komodo::{
+    algebra::linalg::Matrix,
+    error::KomodoError,
+    fec::{decode, encode},
+    semi_avid::{build, prove, recode, verify, Block},
+    zk::setup,
+};
+
+fn run<F, G, P>() -> Result<(), KomodoError>
+where
+    F: PrimeField,
+    G: CurveGroup<ScalarField = F>,
+    P: DenseUVPolynomial<F>,
+    for<'a, 'b> &'a P: Div<&'b P, Output = P>,
+{
+    let mut rng = test_rng();
+
+    // the code parameters and the data to manipulate
+    let (k, n) = (3, 6_usize);
+    let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+    eprintln!("loaded {} bytes of data", bytes.len());
+
+    // Semi-AVID needs a _trusted setup_ to prove and verify blocks of encoded data
+    eprint!("creating trusted setup... ");
+    let powers = setup::<F, G>(bytes.len(), &mut rng)?;
+    eprintln!("done");
+
+    // encode and prove the data with a _random_ encoding
+    eprint!("building blocks... ");
+    let encoding_mat = &Matrix::random(k, n, &mut rng);
+    let shards = encode(&bytes, encoding_mat)?;
+    let proof = prove(&bytes, &powers, encoding_mat.height)?;
+    let blocks = build::<F, G, P>(&shards, &proof);
+    eprintln!("done");
+
+    // verify that all the blocks are valid
+    eprint!("verifying blocks... ");
+    for block in &blocks {
+        assert!(verify(block, &powers)?);
+    }
+
+    // corrupt the first block...
+    let mut serialized = vec![0; blocks[0].serialized_size(Compress::No)];
+    blocks[0]
+        .serialize_with_mode(&mut serialized[..], Compress::No)
+        .unwrap();
+    // -> attack the `data` field of the [`komodo::fec::Shard`] structure
+    let field_element_size = (F::MODULUS_BIT_SIZE as usize + 7) / 8;
+    const VEC_LEN_SIZE: usize = 8;
+    const HASH_SIZE: usize = 32;
+    const U32_SIZE: usize = 4;
+    let data_start_index =
+        U32_SIZE + VEC_LEN_SIZE + k * field_element_size + VEC_LEN_SIZE + HASH_SIZE + VEC_LEN_SIZE;
+    serialized[data_start_index] = 0x00;
+    let block: Block<F, G> =
+        Block::deserialize_with_mode(&serialized[..], Compress::No, Validate::No).unwrap();
+    // ... and make sure it is not valid anymore
+    assert!(!verify(&block, &powers)?);
+
+    eprintln!("all good");
+
+    // some recoding examples:
+    // - let's denote the original blocks by $(b_i)_{0 \leq i \lt n}$
+    // - if block $b$ is the result of recoding blocks $b_i$ and $b_j$, then we write
+    // $b = b_i + b_j$
+    eprint!("some recoding scenarii... ");
+
+    // successfully decode the data with the following blocks
+    // - $b_0 + b_1$
+    // - $b_2$
+    // - $b_3$
+    //
+    // > **Note**
+    // >
+    // > it works because $b_0$, $b_1$, $b_2$ and $b_3$ are all linearly independent and thus $b_0
+    // + b_1$, $b_2$ and $b_3$ are as well
+    let b_0_1 = recode(&blocks[0..=1], &mut rng).unwrap().unwrap();
+    let shards = vec![
+        b_0_1.shard,
+        blocks[2].shard.clone(),
+        blocks[3].shard.clone(),
+    ];
+    assert_eq!(bytes, decode(shards).unwrap());
+
+    // fail to decode the data with the following blocks
+    // - $b_0$
+    // - $b_1$
+    // - $b_0 + b_1$
+    //
+    // > **Note**
+    // >
+    // > it fails because $b_0 + b_1$ is lineary dependent on $b_0$ and $b_1$
+    let b_0_1 = recode(&[blocks[0].clone(), blocks[1].clone()], &mut rng)
+        .unwrap()
+        .unwrap();
+    let shards = vec![
+        blocks[0].shard.clone(),
+        blocks[1].shard.clone(),
+        b_0_1.shard,
+    ];
+    assert!(decode(shards).is_err());
+
+    // successfully decode the data with the following blocks
+    // - $b_0 + b_1$
+    // - $b_2 + b_3$
+    // - $b_1 + b_4$
+    let b_0_1 = recode(&blocks[0..=1], &mut rng).unwrap().unwrap();
+    let b_2_3 = recode(&blocks[2..=3], &mut rng).unwrap().unwrap();
+    let b_1_4 = recode(&[blocks[1].clone(), blocks[4].clone()], &mut rng)
+        .unwrap()
+        .unwrap();
+    let shards = vec![b_0_1.shard, b_2_3.shard, b_1_4.shard];
+    assert_eq!(bytes, decode(shards).unwrap());
+
+    // successfully decode the data with the following blocks
+    // - $b_0 + b_1 + b_2$
+    // - $b_0 + b_1 + b_2$
+    // - $b_0 + b_1 + b_2$
+    //
+    // > **Note**
+    // >
+    // > it works, even though all three recoded shards come from the same original ones, because
+    // > the linear combinations that generate the recoded shards are random and different each
+    // > time. because the finite field used is so large, we end up with linearly independent shards
+    let fully_recoded_shards = (0..3)
+        .map(|_| recode(&blocks[0..=2], &mut rng).unwrap().unwrap().shard)
+        .collect();
+    assert_eq!(bytes, decode(fully_recoded_shards).unwrap());
+
+    eprintln!("all good");
+
+    Ok(())
+}
+
+fn main() {
+    run::<Fr, G1Projective, DensePolynomial<Fr>>().unwrap();
+}
diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs
index a34ab0b9..22c6d619 100644
--- a/src/algebra/mod.rs
+++ b/src/algebra/mod.rs
@@ -1,5 +1,4 @@
-//! manipulate finite field elements
-//!
+//! Manipulate finite field elements
 #[cfg(any(feature = "kzg", feature = "aplonk"))]
 use ark_ec::pairing::Pairing;
 #[cfg(feature = "aplonk")]
@@ -18,6 +17,55 @@ pub mod linalg;
 ///
 /// [`split_data_into_field_elements`] supports padding the output vector of
 /// elements by giving a number that needs to divide the length of the vector.
+///
+/// # Example
+/// In the following example `Fp` is a small finite field with prime order $65537$ and which
+/// requires only two bytes to represent elements.
+///
+/// 1. splitting `0x02000300`, which contains 4 bytes, will result in two elements of `Fp`, i.e. 2
+///    and 3
+/// ```
+/// # #[derive(ark_ff::MontConfig)]
+/// # #[modulus = "65537"]
+/// # #[generator = "3"]
+/// # struct FpConfig_;
+/// # type Fp = ark_ff::Fp64<ark_ff::MontBackend<FpConfig_, 1>>;
+/// #
+/// # use komodo::algebra::split_data_into_field_elements;
+/// # use ark_ff::PrimeField;
+/// # fn main() {
+/// assert_eq!(
+///     split_data_into_field_elements::<Fp>(&[2, 0, 3, 0], 1),
+///     vec![Fp::from(2), Fp::from(3)],
+/// );
+/// # }
+/// ```
+/// 2. splitting `0x0200030004000500`, which contains 8 bytes, and asking for a multiple of 3
+///    elements, will result in 6 elements of `Fp`, i.e. 2, 3, 4 and 5 which come from the data and
+///    two padding elements, set to 1.
+/// ```
+/// # #[derive(ark_ff::MontConfig)]
+/// # #[modulus = "65537"]
+/// # #[generator = "3"]
+/// # struct FpConfig_;
+/// # type Fp = ark_ff::Fp64<ark_ff::MontBackend<FpConfig_, 1>>;
+/// #
+/// # use komodo::algebra::split_data_into_field_elements;
+/// # use ark_ff::PrimeField;
+/// # fn main() {
+/// assert_eq!(
+///     split_data_into_field_elements::<Fp>(&[2, 0, 3, 0, 4, 0, 5, 0], 3),
+///     vec![
+///         Fp::from(2),
+///         Fp::from(3),
+///         Fp::from(4),
+///         Fp::from(5),
+///         Fp::from(1),
+///         Fp::from(1),
+///     ],
+/// );
+/// # }
+/// ```
 pub fn split_data_into_field_elements<F: PrimeField>(bytes: &[u8], modulus: usize) -> Vec<F> {
     let bytes_per_element = (F::MODULUS_BIT_SIZE as usize) / 8;
 
@@ -48,6 +96,11 @@ pub(crate) fn merge_elements_into_bytes<F: PrimeField>(elements: &[F]) -> Vec<u8
 }
 
 #[cfg(any(feature = "kzg", feature = "aplonk"))]
+/// compute the linear combination of polynomials
+///
+/// if the _lhs_ are the coefficients, $(c_i)$ in a field $\mathbb{F}$, and the _rhs_ are the
+/// polynomials, $(p_i)$ with coefficients in $\mathbb{F}$, then the result of this is
+/// $$P(X) = \sum\limits_{i = 0}^{n - 1} c_i p_i(X)$$
 pub(crate) fn scalar_product_polynomial<E, P>(lhs: &[E::ScalarField], rhs: &[P]) -> P
 where
     E: Pairing,
@@ -68,6 +121,12 @@ where
 }
 
 #[cfg(feature = "aplonk")]
+/// compute the scalar product between vectors of elements in $G_1$ and in $G_2$ respectively
+///
+/// if the _lhs_ are the elements of $G_1$, $(a_i)$, and the _rhs_ are the ones from $G_2$, $(b_i)$,
+/// then the result of this is
+/// $$c = \sum\limits_{i = 0}^{n - 1} E(a_i, b_i)$$
+/// where $E$ is a bilinear mapping from $G_1 \times G_2 \rightarrow G_T$
 pub(super) fn scalar_product_pairing<E: Pairing>(lhs: &[E::G1], rhs: &[E::G2]) -> PairingOutput<E> {
     lhs.iter()
         .zip(rhs.iter())
@@ -76,6 +135,11 @@ pub(super) fn scalar_product_pairing<E: Pairing>(lhs: &[E::G1], rhs: &[E::G2]) -
 }
 
 #[cfg(feature = "aplonk")]
+/// compute the scalar product between vectors of elements of a finite field $\mathbb{F}$
+///
+/// if _lhs_ is the first vector, $(a_i)$, and _rhs_ is the second, $(b_i)$, then the result of this
+/// is
+/// $$c = \sum\limits_{i = 0}^{n - 1} a_i b_i$$
 pub(super) fn scalar_product<E: Pairing>(
     lhs: &[E::ScalarField],
     rhs: &[E::ScalarField],
@@ -84,11 +148,13 @@ pub(super) fn scalar_product<E: Pairing>(
 }
 
 #[cfg(feature = "aplonk")]
+/// see [`scalar_product`], but with _lhs_ a vector from $G_1$
 pub(super) fn scalar_product_g1<E: Pairing>(lhs: &[E::G1], rhs: &[E::ScalarField]) -> E::G1 {
     lhs.iter().zip(rhs.iter()).map(|(l, r)| l.mul(r)).sum()
 }
 
 #[cfg(feature = "aplonk")]
+/// see [`scalar_product`], but with _lhs_ a vector from $G_2$
 pub(super) fn scalar_product_g2<E: Pairing>(lhs: &[E::G2], rhs: &[E::ScalarField]) -> E::G2 {
     lhs.iter().zip(rhs.iter()).map(|(l, r)| l.mul(r)).sum()
 }
diff --git a/src/aplonk/ipa.rs b/src/aplonk/ipa.rs
index e5e2d98c..5ea33833 100644
--- a/src/aplonk/ipa.rs
+++ b/src/aplonk/ipa.rs
@@ -57,7 +57,7 @@ fn is_power_of_two(n: usize) -> bool {
 /// prove a sequence of commits with a modified IPA
 ///
 /// > **Note**  
-/// > when we say *page xx* or *<name of algorithm>*, we refer to the following
+/// > when we say *page xx* or *\<name of algorithm\>*, we refer to the following
 /// > paper: [aPlonk from [Ambrona et al.]][aPlonK]
 ///
 /// the following algorithm
@@ -86,9 +86,10 @@ pub(super) fn prove<E: Pairing>(
     mu: &[E::G1],
 ) -> Result<(Proof<E>, Vec<E::ScalarField>), KomodoError> {
     if !is_power_of_two(k) {
-        return Err(KomodoError::Other(
-            "PolynomialCountIpaError: not a power of 2".to_string(),
-        ));
+        return Err(KomodoError::Other(format!(
+            "PolynomialCountIpaError: expected $k$ to be a power of 2, found {}",
+            k
+        )));
     }
     let kappa = f64::log2(k as f64) as usize;
     let mut l_g = vector::zero::<PairingOutput<E>>(kappa);
@@ -133,7 +134,10 @@ pub(super) fn prove<E: Pairing>(
         let u_j_inv = if let Some(inverse) = u[j].inverse() {
             inverse
         } else {
-            return Err(KomodoError::Other("EllipticInverseError".to_string()));
+            return Err(KomodoError::Other(format!(
+                "EllipticInverseError: could not inverse {:?}",
+                u[j],
+            )));
         };
 
         // 6.
@@ -173,7 +177,7 @@ pub(super) fn prove<E: Pairing>(
 /// verify the integrity of a proven sequence of commits with a modified IPA
 ///
 /// > **Note**  
-/// > when we say *page xx* or *<name of algorithm>*, we refer to the following
+/// > when we say *page xx* or *\<name of algorithm\>*, we refer to the following
 /// > paper: [aPlonk from [Ambrona et al.]][aPlonK]
 ///
 /// the following algorithm
@@ -210,9 +214,10 @@ where
     for<'a, 'b> &'a P: Div<&'b P, Output = P>,
 {
     if !is_power_of_two(k) {
-        return Err(KomodoError::Other(
-            "PolynomialCountIpaError: not a power of 2".to_string(),
-        ));
+        return Err(KomodoError::Other(format!(
+            "PolynomialCountIpaError: expected $k$ to be a power of 2, found {}",
+            k,
+        )));
     }
     let kappa = f64::log2(k as f64) as usize;
     let mut ts = match transcript::initialize(c_g, r, p) {
@@ -247,7 +252,10 @@ where
         if let Some(inverse) = u_i.inverse() {
             u_inv.push(inverse)
         } else {
-            return Err(KomodoError::Other("EllipticInverseError".to_string()));
+            return Err(KomodoError::Other(format!(
+                "EllipticInverseError: could not inverse {:?}",
+                u_i,
+            )));
         }
     }
 
diff --git a/src/aplonk/mod.rs b/src/aplonk/mod.rs
index 5b488347..9abb3517 100644
--- a/src/aplonk/mod.rs
+++ b/src/aplonk/mod.rs
@@ -117,19 +117,27 @@ where
     let supported_degree = polynomials.iter().map(|p| p.degree()).max().unwrap_or(0);
 
     if setup.ipa.ck_tau.len() < polynomials.len() {
-        return Err(KomodoError::Other("setup error".to_string()));
+        return Err(KomodoError::Other(format!(
+            "setup error: expected at least {} powers of ck_tau for IPA, found {}",
+            polynomials.len(),
+            setup.ipa.ck_tau.len(),
+        )));
     }
 
     let (powers, _) = trim(setup.kzg, supported_degree);
 
     if powers.powers_of_g.len() <= supported_degree {
-        return Err(KomodoError::Other("setup error".to_string()));
+        return Err(KomodoError::Other(format!(
+            "setup error: expected at least {} powers of g for KZG, found {}",
+            supported_degree,
+            powers.powers_of_g.len(),
+        )));
     }
 
     // commit.1.
     let mu = match ark_commit(&powers, &polynomials) {
         Ok((mu, _)) => mu,
-        Err(error) => return Err(KomodoError::Other(error.to_string())),
+        Err(error) => return Err(KomodoError::Other(format!("commit error: {}", error))),
     };
     let mu: Vec<E::G1> = mu.iter().map(|c| c.0.into_group()).collect();
 
@@ -205,7 +213,7 @@ where
             &Randomness::<E::ScalarField, P>::empty(),
         ) {
             Ok(proof) => proof,
-            Err(error) => return Err(KomodoError::Other(format!("ark error: {}", error))),
+            Err(error) => return Err(KomodoError::Other(format!("kzg open error: {}", error))),
         };
 
         // open.5.
@@ -222,7 +230,10 @@ where
             if let Some(inverse) = u_i.inverse() {
                 u_inv.push(inverse)
             } else {
-                return Err(KomodoError::Other("EllipticInverseError".to_string()));
+                return Err(KomodoError::Other(format!(
+                    "EllipticInverseError: could not inverse {:?}",
+                    u_i
+                )));
             }
         }
 
@@ -245,7 +256,7 @@ where
             &Randomness::<E::ScalarField, P>::empty(),
         ) {
             Ok((h, _)) => h,
-            Err(error) => return Err(KomodoError::Other(format!("ArkError: {}", error))),
+            Err(error) => return Err(KomodoError::Other(format!("kzg witness error: {}", error))),
         };
         // open.8.2.
         let aplonk_proof = h
@@ -360,7 +371,10 @@ where
         if let Some(inverse) = u_i.inverse() {
             u_inv.push(inverse)
         } else {
-            return Err(KomodoError::Other("EllipticInverseError".to_string()));
+            return Err(KomodoError::Other(format!(
+                "EllipticInverseError: could not inverse {:?}",
+                u_i
+            )));
         }
     }
 
diff --git a/src/conversions.rs b/src/conversions.rs
index e6ae5b93..6cc35f0e 100644
--- a/src/conversions.rs
+++ b/src/conversions.rs
@@ -1,4 +1,3 @@
-#[cfg(test)]
 pub(crate) fn u32_to_u8_vec(num: u32) -> Vec<u8> {
     vec![
         (num & 0xFF) as u8,
@@ -8,7 +7,6 @@ pub(crate) fn u32_to_u8_vec(num: u32) -> Vec<u8> {
     ]
 }
 
-#[cfg(test)]
 mod tests {
     #[test]
     fn u32_to_u8_convertion() {
diff --git a/src/fec.rs b/src/fec.rs
index 7a53afbe..0d6383b8 100644
--- a/src/fec.rs
+++ b/src/fec.rs
@@ -141,7 +141,9 @@ pub fn recode_random<F: PrimeField>(
 /// > otherwise, an error might be thrown to the caller.
 ///
 /// Padding might be applied depending on the size of the data compared to the size of the encoding
-/// matrix.
+/// matrix. (see [`algebra::split_data_into_field_elements`])
+///
+/// This is the inverse of [`decode`].
 pub fn encode<F: PrimeField>(
     data: &[u8],
     encoding_mat: &Matrix<F>,
@@ -180,6 +182,8 @@ pub fn encode<F: PrimeField>(
 /// > this function might fail in a variety of cases
 /// > - if there are too few shards
 /// > - if there are linear dependencies between shards
+///
+/// This is the inverse of [`encode`].
 pub fn decode<F: PrimeField>(shards: Vec<Shard<F>>) -> Result<Vec<u8>, KomodoError> {
     if shards.is_empty() {
         return Err(KomodoError::TooFewShards(0, 0));
diff --git a/src/fs.rs b/src/fs.rs
index 3b59d588..360a698c 100644
--- a/src/fs.rs
+++ b/src/fs.rs
@@ -52,8 +52,19 @@ pub fn dump(
     Ok(filename)
 }
 
-/// dump a bunch of blocks to the disk and return a JSON / NUON compatible table
+/// dump a bunch of blocks to the disk and return a JSON / NUON compatible list
 /// of all the hashes that have been dumped
+///
+/// > **Note**
+/// >
+/// > this is a wrapper around [`dump`]
+///
+/// # Example
+/// let's say we give three blocks to [`dump_blocks`] and their hashes are `aaaa`, `bbbb` and
+/// `cccc` respectively, then this function will return
+/// ```json
+/// '["aaaa", "bbbb", "cccc"]'
+/// ```
 pub fn dump_blocks<F: PrimeField, G: CurveGroup<ScalarField = F>>(
     blocks: &[Block<F, G>],
     block_dir: &PathBuf,
@@ -77,6 +88,19 @@ pub fn dump_blocks<F: PrimeField, G: CurveGroup<ScalarField = F>>(
 }
 
 /// read blocks from a list of block hashes
+///
+/// > **Note**
+/// >
+/// > this is a basically the inverse of [`dump_blocks`]
+///
+/// # Example
+/// let's say we have three blocks `A`, `B` and `C` whose hashes are `aaaa`, `bbbb` and `cccc`
+/// respectively.
+/// if one calls [`read_blocks`] with `aaaa` and `cccc` as the queried block hashes, the output of
+/// this function will be
+/// ```ignore
+/// Ok(vec![("aaaa", A), ("cccc", C)])
+/// ```
 pub fn read_blocks<F: PrimeField, G: CurveGroup<ScalarField = F>>(
     block_hashes: &[String],
     block_dir: &Path,
diff --git a/src/kzg.rs b/src/kzg.rs
index 72baa083..32da9d3d 100644
--- a/src/kzg.rs
+++ b/src/kzg.rs
@@ -63,7 +63,7 @@ where
         for p in &polynomials {
             let elt = p.evaluate(pt);
             if let Err(error) = elt.serialize_with_mode(&mut eval_bytes, Compress::Yes) {
-                return Err(KomodoError::Other(error.to_string()));
+                return Err(KomodoError::Other(format!("Serialization: {}", error)));
             }
         }
 
@@ -88,7 +88,7 @@ where
                 commit: commits.clone(),
                 proof,
             }),
-            Err(error) => return Err(KomodoError::Other(error.to_string())),
+            Err(error) => return Err(KomodoError::Other(format!("kzg open error: {}", error))),
         };
     }
 
diff --git a/src/lib.rs b/src/lib.rs
index 78afa610..8b69d6a2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,57 @@
 //! Komodo: Cryptographically-proven Erasure Coding
+//!
+//! Komodo provides an easy-to-use Rust library and ecosystem that is composed of two main parts:
+//! - support for FEC encoding and decoding with the [`fec`] submodule
+//! - support for proving and verifying shards of encoded data with the [`semi_avid`], [`kzg`]* and
+//! [`aplonk`]* submodules
+//!
+//! > **Note**
+//! >
+//! > modules marked with an `*`, e.g. [`kzg`]*, are hidden behind a _Cargo_ feature with the same
+//! > name
+//!
+//! Other submodules define several fundamental building blocks to Komodo, but which are not
+//! mandatory to explore to understand the protocols.
+//!
+//! # Example
+//! Let's explain with a very simple example how things operate with Komodo.
+//!
+//! > **Note**
+//! >
+//! > the following example uses some syntax of Rust but is NOT valid Rust code and omits a lot of
+//! > details for both Rust and Komodo
+//!
+//! 1. choose an _encoding matrix_ to encode the _input data_
+//! ```ignore
+//! let encoding_mat = Matrix::random(k, n, rng);
+//! ```
+//! 2. encode the data and build encoded _shards_
+//! ```ignore
+//! let shards = fec::encode(bytes, encoding_mat);
+//! ```
+//! 3. attach a _cryptographic proof_ to all the shards and get a proven _block_
+//! ```ignore
+//! let blocks = prove(bytes, k);
+//! ```
+//! 4. verify each _block_ individually
+//! ```ignore
+//! for block in blocks {
+//!     assert!(verify(block));
+//! }
+//! ```
+//! 5. decode the original data with any subset of _k_ blocks
+//! ```ignore
+//! assert_eq!(bytes, fec::decode(blocks[0..k]));
+//! ```
 pub mod algebra;
 #[cfg(feature = "aplonk")]
 pub mod aplonk;
+#[cfg(test)]
 #[cfg(any(feature = "kzg", feature = "aplonk"))]
 mod conversions;
 pub mod error;
 pub mod fec;
+#[cfg(feature = "fs")]
 pub mod fs;
 #[cfg(feature = "kzg")]
 pub mod kzg;
diff --git a/src/semi_avid.rs b/src/semi_avid.rs
index 2371b570..b21a9e73 100644
--- a/src/semi_avid.rs
+++ b/src/semi_avid.rs
@@ -1,3 +1,136 @@
+//! Semi-AVID: a proving scheme suited for an _information dispersal_ context
+//!
+//! In their paper, [Nazirkhanova et al.](https://arxiv.org/abs/2111.12323) introduce a new proving
+//! scheme.
+//!
+//! In opposition to how it is commonly done in protocols such as
+//! [KZG](https://link.springer.com/chapter/10.1007/978-3-642-17373-8_11), the data is interpreted
+//! as column-oriented polynomials.
+//!
+//! Using FEC notations, there are $k$ such column-oriented polynomials, i.e. the $k$ source shards.
+//! They are all commited using a common trusted setup and these $k$ commitments are used to prove
+//! the integrity of encoded shards.
+//!
+//! In order to verify this property, i.e. that a given shard has been computed as a linear
+//! combination of the $k$ source shards, the _homomorphic_ property of the commit operation is
+//! used: _the commitment of a linear combination of polynomials is equal to the same linear
+//! combination of the commiments of the same polynomials_.
+//!
+//! This give us a simple, lightweight and fast commitment scheme.
+//!
+//! # Example
+//! > **Note**
+//! >
+//! > below, `F`, `G` and `DP<F>` are explicitely specified everywhere but, in _real_ code, i.e.
+//! > using generic types as it's commonly done in Arkworks, it should be possible to specify them
+//! > once and Rust will take care of _carrying_ the types in the rest of the code. Also, `DP<F>`
+//! > will likely be its own generic type, usually written `P` in this code base.
+//!
+//! - first, let's import some types...
+//! ```
+//! use ark_bls12_381::{Fr as F, G1Projective as G};
+//! use ark_poly::univariate::DensePolynomial as DP;
+//! ```
+//! - and setup the input data
+//! ```
+//! # fn main() {
+//! let mut rng = ark_std::test_rng();
+//!
+//! let (k, n) = (3, 6_usize);
+//! let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+//! # }
+//! ```
+//! - then, Semi-AVID requires a trusted setup to prove and verify
+//! ```
+//! # use ark_bls12_381::{Fr as F, G1Projective as G};
+//! # fn main() {
+//! # let mut rng = ark_std::test_rng();
+//! #
+//! # let (k, n) = (3, 6_usize);
+//! # let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+//! #
+//! let powers = komodo::zk::setup::<F, G>(bytes.len(), &mut rng).unwrap();
+//! # }
+//! ```
+//! - we can now build an encoding matrix, encode the data, prove the shards and build [`Block`]s
+//! ```
+//! # use ark_bls12_381::{Fr as F, G1Projective as G};
+//! # use ark_poly::univariate::DensePolynomial as DP;
+//! #
+//! # use komodo::semi_avid::{build, prove, verify};
+//! #
+//! # fn main() {
+//! # let mut rng = ark_std::test_rng();
+//! #
+//! # let (k, n) = (3, 6_usize);
+//! # let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+//! #
+//! # let powers = komodo::zk::setup::<F, G>(bytes.len(), &mut rng).unwrap();
+//! #
+//! let encoding_mat = &komodo::algebra::linalg::Matrix::random(k, n, &mut rng);
+//! let shards = komodo::fec::encode(&bytes, encoding_mat).unwrap();
+//! let proof = prove::<F, G, DP<F>>(&bytes, &powers, encoding_mat.height).unwrap();
+//! let blocks = build::<F, G, DP<F>>(&shards, &proof);
+//! # }
+//! ```
+//! - finally, each [`Block`] can be verified individually
+//! ```
+//! # use ark_bls12_381::{Fr as F, G1Projective as G};
+//! # use ark_poly::univariate::DensePolynomial as DP;
+//! #
+//! # use komodo::semi_avid::{build, prove, verify};
+//! #
+//! # fn main() {
+//! # let mut rng = ark_std::test_rng();
+//! #
+//! # let (k, n) = (3, 6_usize);
+//! # let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+//! #
+//! # let powers = komodo::zk::setup::<F, G>(bytes.len(), &mut rng).unwrap();
+//! #
+//! # let encoding_mat = &komodo::algebra::linalg::Matrix::random(k, n, &mut rng);
+//! # let shards = komodo::fec::encode(&bytes, encoding_mat).unwrap();
+//! # let proof = prove::<F, G, DP<F>>(&bytes, &powers, encoding_mat.height).unwrap();
+//! # let blocks = build::<F, G, DP<F>>(&shards, &proof);
+//! #
+//! for block in &blocks {
+//!     assert!(verify::<F, G, DP<F>>(block, &powers).unwrap());
+//! }
+//! # }
+//! ```
+//! - and decoded using any $k$ of the shards
+//! ```
+//! # use ark_bls12_381::{Fr as F, G1Projective as G};
+//! # use ark_poly::univariate::DensePolynomial as DP;
+//! #
+//! # use komodo::semi_avid::{build, prove};
+//! #
+//! # fn main() {
+//! # let mut rng = ark_std::test_rng();
+//! #
+//! # let (k, n) = (3, 6_usize);
+//! # let bytes = include_bytes!("../assets/dragoon_133x133.png").to_vec();
+//! #
+//! # let powers = komodo::zk::setup::<F, G>(bytes.len(), &mut rng).unwrap();
+//! #
+//! # let encoding_mat = &komodo::algebra::linalg::Matrix::random(k, n, &mut rng);
+//! # let shards = komodo::fec::encode(&bytes, encoding_mat).unwrap();
+//! # let proof = prove::<F, G, DP<F>>(&bytes, &powers, encoding_mat.height).unwrap();
+//! # let blocks = build::<F, G, DP<F>>(&shards, &proof);
+//! #
+//! let shards = blocks[0..k].iter().cloned().map(|b| b.shard).collect();
+//! assert_eq!(bytes, komodo::fec::decode(shards).unwrap());
+//! # }
+//! ```
+//!
+//! # Recoding
+//! By constrution, Semi-AVID supports an operation on shards known as _recoding_. This allows to
+//! combine an arbitrary number of shards together on the fly, without decoding the data and then
+//! re-encoding brand new shards.
+//!
+//! This is great because any node in the system can locally augment its local pool of shards.
+//! However, this operation will introduce linear dependencies between recoded shards and their
+//! _parents_, which might decrease the diversity of shards and harm the decoding process.
 use ark_ec::CurveGroup;
 use ark_ff::PrimeField;
 use ark_poly::DenseUVPolynomial;
@@ -81,6 +214,7 @@ impl<F: PrimeField, G: CurveGroup<ScalarField = F>> std::fmt::Display for Block<
 /// different, an error will be returned.
 ///
 /// > **Note**
+/// >
 /// > this is a wrapper around [`fec::recode_random`].
 pub fn recode<F: PrimeField, G: CurveGroup<ScalarField = F>>(
     blocks: &[Block<F, G>],
diff --git a/src/zk.rs b/src/zk.rs
index 4f86b3cd..09640c6d 100644
--- a/src/zk.rs
+++ b/src/zk.rs
@@ -1,4 +1,9 @@
 //! a replacement of Arkworks' KZG10 module
+//!
+//! this module mostly redefines [`ark_poly_commit::kzg10::KZG10::setup`] and
+//! [`ark_poly_commit::kzg10::KZG10::commit`] to be used with [`crate::semi_avid`].
+//!
+//! also defines some tool functions such as [`trim`] or [`nb_elements_in_setup`].
 use ark_ec::{scalar_mul::fixed_base::FixedBase, CurveGroup, VariableBaseMSM};
 use ark_ff::PrimeField;
 use ark_poly::DenseUVPolynomial;
@@ -12,9 +17,13 @@ use ark_poly_commit::kzg10;
 
 use crate::error::KomodoError;
 
-/// the representation of a ZK trusted setup
+/// a ZK trusted setup
 ///
 /// this is a simple wrapper around a sequence of elements of the curve.
+///
+/// > **Note**
+/// >
+/// > this is a simpler version of [`ark_poly_commit::kzg10::UniversalParams`]
 #[derive(Debug, Clone, Default, CanonicalSerialize, CanonicalDeserialize, PartialEq)]
 pub struct Powers<F: PrimeField, G: CurveGroup<ScalarField = F>>(Vec<G::Affine>);
 
@@ -33,13 +42,21 @@ impl<F: PrimeField, G: CurveGroup<ScalarField = F>> IntoIterator for Powers<F, G
     }
 }
 
-/// a ZK commitment, i.e. an evaluatio of a given polynomial on a secret
+/// a ZK commitment, i.e. an evaluation of a given polynomial on a secret element
 ///
-/// this is a simpler wrapper around a single elemenf of the curve.
+/// this is a simple wrapper around a single elemenf of the curve.
+///
+/// > **Note**
+/// >
+/// > this is a simpler version of [`ark_poly_commit::kzg10::Commitment`]
 #[derive(Debug, Clone, Copy, Default, CanonicalSerialize, CanonicalDeserialize, PartialEq)]
 pub struct Commitment<F: PrimeField, G: CurveGroup<ScalarField = F>>(pub G::Affine);
 
 /// create a trusted setup of a given size, the expected maximum degree of the data
+///
+/// > **Note**
+/// >
+/// > this is a simpler version of [`ark_poly_commit::kzg10::KZG10::setup`]
 pub fn setup<F: PrimeField, G: CurveGroup<ScalarField = F>>(
     max_degree: usize,
     rng: &mut impl RngCore,
@@ -106,6 +123,10 @@ fn convert_to_bigints<F: PrimeField>(p: &[F]) -> Vec<F::BigInt> {
 }
 
 /// compute a commitment of a polynomial on a trusted setup
+///
+/// > **Note**
+/// >
+/// > this is a simpler version of [`ark_poly_commit::kzg10::KZG10::commit`]
 pub fn commit<F, G, P>(
     powers: &Powers<F, G>,
     polynomial: &P,
@@ -140,8 +161,8 @@ where
 ///
 /// > **Note**
 /// > - `powers` can be generated with functions like [`setup`]
-/// > - if `polynomials` has length `n`, then [`commit`] will generate `n`
-/// >   commits.
+/// > - if `polynomials` has length `m`, then [`batch_commit`] will generate `m` commits
+/// > - see [`commit`] for the individual _commit_ operations
 #[allow(clippy::type_complexity)]
 #[inline(always)]
 pub fn batch_commit<F, G, P>(
@@ -202,6 +223,7 @@ pub fn trim<E: Pairing>(
 
 #[cfg(any(feature = "kzg", feature = "aplonk"))]
 #[allow(clippy::type_complexity)]
+/// same as [`batch_commit`] but uses [`ark_poly_commit::kzg10::KZG10::commit`] instead of [`commit`]
 pub fn ark_commit<E, P>(
     powers: &kzg10::Powers<E>,
     polynomials: &[P],
-- 
GitLab