diff --git a/libiop/algebra/utils.hpp b/libiop/algebra/utils.hpp index f25ed7ab..9f41b023 100644 --- a/libiop/algebra/utils.hpp +++ b/libiop/algebra/utils.hpp @@ -19,6 +19,9 @@ void bitreverse_vector(std::vector &a); template std::vector random_vector(const std::size_t count); +template +bool compare_first(const std::pair &a, const std::pair &b); + template std::vector all_subset_sums(const std::vector &basis, const T& shift = 0) #if defined(__clang__) diff --git a/libiop/algebra/utils.tcc b/libiop/algebra/utils.tcc index e1adde2c..8ed6778b 100644 --- a/libiop/algebra/utils.tcc +++ b/libiop/algebra/utils.tcc @@ -168,6 +168,12 @@ std::vector random_vector(const std::size_t count) return result; } +template +bool compare_first(const std::pair &a, const std::pair &b) +{ + return a.first < b.first; +} + template std::vector random_FieldT_vector(const std::size_t count) { diff --git a/libiop/bcs/bcs_common.hpp b/libiop/bcs/bcs_common.hpp index 3ad86dd9..8f4a7f59 100644 --- a/libiop/bcs/bcs_common.hpp +++ b/libiop/bcs/bcs_common.hpp @@ -25,12 +25,14 @@ template struct bcs_transformation_parameters { std::size_t security_parameter; /* TODO: possibly revisit in the future */ bcs_hash_type hash_enum; + std::size_t cap_size; pow_parameters pow_params_; std::shared_ptr> hashchain_; std::shared_ptr> leafhasher_; two_to_one_hash_function compression_hasher; + cap_hash_function cap_hasher; }; template diff --git a/libiop/bcs/bcs_common.tcc b/libiop/bcs/bcs_common.tcc index a17a51d5..b4409a0b 100644 --- a/libiop/bcs/bcs_common.tcc +++ b/libiop/bcs/bcs_common.tcc @@ -471,9 +471,11 @@ void bcs_protocol::seal_interaction_registrations() size, this->parameters_.leafhasher_, this->parameters_.compression_hasher, + this->parameters_.cap_hasher, this->digest_len_bytes_, make_zk, - this->parameters_.security_parameter); + this->parameters_.security_parameter, + this->parameters_.cap_size); this->Merkle_trees_.emplace_back(MT); } } @@ -710,7 +712,7 @@ void print_detailed_transcript_data( const size_t digest_len_bytes = 2 * (params.security_parameter / 8); const size_t field_size = (libff::log_of_field_size_helper(FieldT::zero()) + 7) / 8; - std::vector two_to_one_hashes_by_round; + std::vector internal_hash_complexity_by_round; std::vector leaf_hashes_by_round; std::vector zk_hashes_by_round; std::vector IOP_size_by_round; @@ -724,6 +726,7 @@ void print_detailed_transcript_data( MT_size, params.leafhasher_, params.compression_hasher, + params.cap_hasher, digest_len_bytes, false, params.security_parameter); @@ -741,10 +744,9 @@ void print_detailed_transcript_data( query_positions.emplace_back(MT_position); } } - size_t num_two_to_one_hashes_in_round = - MT.count_hashes_to_verify_set_membership_proof( - query_positions); - two_to_one_hashes_by_round.emplace_back(num_two_to_one_hashes_in_round); + size_t internal_hash_complexity_in_round = + MT.count_internal_hash_complexity_to_verify_set_membership(query_positions); + internal_hash_complexity_by_round.emplace_back(internal_hash_complexity_in_round); const size_t num_values_per_leaf = transcript.query_responses_[round][0].size(); const size_t num_leaves = transcript.query_responses_[round].size(); leaf_hashes_by_round.emplace_back(num_values_per_leaf * num_leaves); @@ -788,14 +790,17 @@ void print_detailed_transcript_data( printf("\n"); printf("total prover messages size: %lu\n", total_prover_message_size); - const size_t total_two_to_one_hashes = std::accumulate( - two_to_one_hashes_by_round.begin(), two_to_one_hashes_by_round.end(), 0); + const size_t total_internal_hash_complexity = std::accumulate( + internal_hash_complexity_by_round.begin(), internal_hash_complexity_by_round.end(), 0); const size_t total_leaves_hashed = std::accumulate( leaf_hashes_by_round.begin(), leaf_hashes_by_round.end(), 0); const size_t total_zk_hashes = std::accumulate( zk_hashes_by_round.begin(), zk_hashes_by_round.end(), 0); - const size_t total_hashes = total_two_to_one_hashes + total_leaves_hashed + total_zk_hashes; - printf("total two to one hashes: %lu\n", total_two_to_one_hashes); + /* Since each two-to-one hash is counted as two units, we divide by 2 here to make it consistent. + It would be nice to take into account how many leaves are hashed, but we are unfortunately + not provided this information. */ + const size_t total_hashes = total_internal_hash_complexity / 2 + total_leaves_hashed + total_zk_hashes; + printf("total internal hash complexity: %lu\n", total_internal_hash_complexity); printf("total leaves hashed: %lu\n", total_leaves_hashed); printf("total hashes: %lu\n", total_hashes); printf("\n"); @@ -808,7 +813,7 @@ void print_detailed_transcript_data( printf("MT_depth %lu\n", MT_depths[round]); printf("IOP size: %lu bytes\n", IOP_size_by_round[round]); printf("BCS size: %lu bytes\n", BCS_size_by_round[round]); - printf("number of two to one hashes: %lu\n", two_to_one_hashes_by_round[round]); + printf("internal hash complexity: %lu\n", internal_hash_complexity_by_round[round]); printf("number of leaves hashed: %lu\n", leaf_hashes_by_round[round]); if (make_zk[round]) { diff --git a/libiop/bcs/common_bcs_parameters.tcc b/libiop/bcs/common_bcs_parameters.tcc index 077219cb..bef04c76 100644 --- a/libiop/bcs/common_bcs_parameters.tcc +++ b/libiop/bcs/common_bcs_parameters.tcc @@ -15,10 +15,11 @@ bcs_transformation_parameters default_bcs_params( params.hash_enum = hash_type; /* TODO: Push setting leaf hash into internal BCS code. Currently 2 is fine, as leaf size is internally unused. */ const size_t leaf_size = 2; - params.leafhasher_ = get_leafhash(hash_type, security_parameter, leaf_size); + params.cap_size = 2; + params.leafhasher_ = get_leafhash(hash_type, security_parameter, leaf_size); params.compression_hasher = get_two_to_one_hash(hash_type, security_parameter); - params.hashchain_ = - get_hashchain(hash_type, security_parameter); + params.cap_hasher = get_cap_hash(hash_type, security_parameter); + params.hashchain_ = get_hashchain(hash_type, security_parameter); // Work per hash. Todo generalize this w/ proper explanations of work amounts const size_t work_per_hash = (hash_type == 1) ? 1 : 128; diff --git a/libiop/bcs/hashing/algebraic_sponge.hpp b/libiop/bcs/hashing/algebraic_sponge.hpp index 5f63408c..89ebc322 100644 --- a/libiop/bcs/hashing/algebraic_sponge.hpp +++ b/libiop/bcs/hashing/algebraic_sponge.hpp @@ -99,14 +99,15 @@ class algebraic_hashchain : public hashchain void absorb_internal(const typename libff::enable_if::value, MT_root_type>::type new_input); }; +/** The algebraic_vector_hash is used for both the algebraic leaf hash and cap hash. */ template -class algebraic_leafhash : public leafhash +class algebraic_vector_hash : public leafhash { protected: std::shared_ptr> sponge_; public: - algebraic_leafhash( + algebraic_vector_hash( std::shared_ptr> sponge, size_t security_parameter); FieldT hash(const std::vector &leaf); @@ -115,6 +116,9 @@ class algebraic_leafhash : public leafhash const zk_salt_type &zk_salt); }; +template +using algebraic_leafhash = algebraic_vector_hash; + template class algebraic_two_to_one_hash { diff --git a/libiop/bcs/hashing/algebraic_sponge.tcc b/libiop/bcs/hashing/algebraic_sponge.tcc index 0733f42b..9d2bb4dd 100644 --- a/libiop/bcs/hashing/algebraic_sponge.tcc +++ b/libiop/bcs/hashing/algebraic_sponge.tcc @@ -206,7 +206,7 @@ MT_root_type algebraic_hashchain::squeeze_root_type() } template -algebraic_leafhash::algebraic_leafhash( +algebraic_vector_hash::algebraic_vector_hash( std::shared_ptr> sponge, size_t security_parameter) : sponge_(sponge->new_sponge()) @@ -218,7 +218,7 @@ algebraic_leafhash::algebraic_leafhash( } template -FieldT algebraic_leafhash::hash( +FieldT algebraic_vector_hash::hash( const std::vector &leaf) { this->sponge_->absorb(leaf); @@ -228,7 +228,7 @@ FieldT algebraic_leafhash::hash( } template -FieldT algebraic_leafhash::zk_hash( +FieldT algebraic_vector_hash::zk_hash( const std::vector &leaf, const zk_salt_type &zk_salt) { diff --git a/libiop/bcs/hashing/blake2b.cpp b/libiop/bcs/hashing/blake2b.cpp index 87eb6caa..5e4d61cd 100644 --- a/libiop/bcs/hashing/blake2b.cpp +++ b/libiop/bcs/hashing/blake2b.cpp @@ -29,6 +29,8 @@ binary_hash_digest blake2b_two_to_one_hash(const binary_hash_digest &first, const binary_hash_digest &second, const std::size_t digest_len_bytes) { + /* binary_hash_digest is a C++ string pointer so we need to sum them since they are not + contiguous in memory. */ const binary_hash_digest first_plus_second = first + second; binary_hash_digest result(digest_len_bytes, 'X'); @@ -47,6 +49,33 @@ binary_hash_digest blake2b_two_to_one_hash(const binary_hash_digest &first, return result; } +binary_hash_digest blake2b_many_to_one_hash(const std::vector &data, + const std::size_t digest_len_bytes) +{ + /* binary_hash_digest is a C++ string pointer so we need to sum them since they are not + contiguous in memory. */ + binary_hash_digest input = data[0]; + for (int i = 1; i < data.size(); i++) + { + input += data[i]; + } + + binary_hash_digest result(digest_len_bytes, 'X'); + + /* see https://download.libsodium.org/doc/hashing/generic_hashing.html */ + const int status = crypto_generichash_blake2b((unsigned char*)&result[0], + digest_len_bytes, + (input.empty() ? NULL : (unsigned char*)&input[0]), + input.size(), + NULL, 0); + if (status != 0) + { + throw std::runtime_error("Got non-zero status from crypto_generichash_blake2b. (Is digest_len_bytes correct?)"); + } + + return result; +} + std::size_t blake2b_integer_randomness_extractor(const binary_hash_digest &root, const std::size_t index, const std::size_t upper_bound) @@ -73,4 +102,4 @@ std::size_t blake2b_integer_randomness_extractor(const binary_hash_digest &root, return result % upper_bound; } -} +} // namespace libiop diff --git a/libiop/bcs/hashing/blake2b.hpp b/libiop/bcs/hashing/blake2b.hpp index bdf14bad..78a63c44 100644 --- a/libiop/bcs/hashing/blake2b.hpp +++ b/libiop/bcs/hashing/blake2b.hpp @@ -58,6 +58,8 @@ class blake2b_leafhash : public leafhash const zk_salt_type &zk_salt); }; +/* Many-to-one hash which takes in a vector of field elements. + Behavior undefined when data is empty. */ template binary_hash_digest blake2b_field_element_hash(const std::vector &data, const std::size_t digest_len_bytes); @@ -73,11 +75,16 @@ std::size_t blake2b_integer_randomness_extractor(const binary_hash_digest &root, const std::size_t upper_bound); binary_hash_digest blake2b_zk_element_hash(const std::vector &first, - const std::size_t digest_len_bytes); + const std::size_t digest_len_bytes); binary_hash_digest blake2b_two_to_one_hash(const binary_hash_digest &first, - const binary_hash_digest &second, - const std::size_t digest_len_bytes); + const binary_hash_digest &second, + const std::size_t digest_len_bytes); + +/* Many-to-one hash which takes in a vector of binary_hash_digest. + Behavior undefined when data is empty. */ +binary_hash_digest blake2b_many_to_one_hash(const std::vector &data, + const std::size_t digest_len_bytes); } // namespace libiop diff --git a/libiop/bcs/hashing/blake2b.tcc b/libiop/bcs/hashing/blake2b.tcc index 4643fc89..0375aab1 100644 --- a/libiop/bcs/hashing/blake2b.tcc +++ b/libiop/bcs/hashing/blake2b.tcc @@ -139,9 +139,8 @@ binary_hash_digest blake2b_leafhash::zk_hash( // don't we need to make them in canonical form first? template binary_hash_digest blake2b_field_element_hash(const std::vector &data, - const std::size_t digest_len_bytes) + const std::size_t digest_len_bytes) { - binary_hash_digest result(digest_len_bytes, 'X'); /* see https://download.libsodium.org/doc/hashing/generic_hashing.html */ @@ -155,7 +154,6 @@ binary_hash_digest blake2b_field_element_hash(const std::vector &data, throw std::runtime_error("Got non-zero status from crypto_generichash_blake2b. (Is digest_len_bytes correct?)"); } - return result; } @@ -256,4 +254,4 @@ std::vector blake2b_FieldT_randomness_extractor(const binary_hash_digest return result; } -} +} // namespace libiop diff --git a/libiop/bcs/hashing/dummy_algebraic_hash.hpp b/libiop/bcs/hashing/dummy_algebraic_hash.hpp index f765ccdf..652ae220 100644 --- a/libiop/bcs/hashing/dummy_algebraic_hash.hpp +++ b/libiop/bcs/hashing/dummy_algebraic_hash.hpp @@ -63,6 +63,9 @@ FieldT dummy_algebraic_two_to_one_hash( const FieldT &second, const std::size_t digest_len_bytes); +template +FieldT dummy_algebraic_cap_hash(const std::vector &data, const std::size_t digest_len_bytes); + } // namespace libiop #include "libiop/bcs/hashing/dummy_algebraic_hash.tcc" diff --git a/libiop/bcs/hashing/dummy_algebraic_hash.tcc b/libiop/bcs/hashing/dummy_algebraic_hash.tcc index e8c0b10b..dc6c9337 100644 --- a/libiop/bcs/hashing/dummy_algebraic_hash.tcc +++ b/libiop/bcs/hashing/dummy_algebraic_hash.tcc @@ -117,7 +117,7 @@ FieldT dummy_algebraic_leafhash::hash(const std::vector &leaf) FieldT sum = FieldT::zero(); for (size_t i = 0; i < leaf.size(); i++) { - sum += FieldT(i) * leaf[i]; + sum += FieldT(i + 1) * leaf[i]; // Add one, otherwise the 0th index is unused. } return sum; } @@ -147,4 +147,16 @@ FieldT dummy_algebraic_two_to_one_hash( return FieldT(2) * first + second; } +template +FieldT dummy_algebraic_cap_hash(const std::vector &data, const std::size_t digest_len_bytes) +{ + FieldT sum = FieldT::zero(); + for (size_t i = 0; i < data.size(); i++) + { + sum += FieldT(i + 1) * data[i]; // Add one, otherwise the 0th index is unused. + } + + return sum; +} + } diff --git a/libiop/bcs/hashing/hash_enum.hpp b/libiop/bcs/hashing/hash_enum.hpp index f7902991..e42c7eab 100644 --- a/libiop/bcs/hashing/hash_enum.hpp +++ b/libiop/bcs/hashing/hash_enum.hpp @@ -30,15 +30,18 @@ static const char* bcs_hash_type_names[] = {"", "blake2b", "poseidon with Starkw template std::shared_ptr> get_hashchain(bcs_hash_type hash_type, size_t security_parameter); -template +template std::shared_ptr> get_leafhash( - const bcs_hash_type hash_type, - const size_t security_parameter, + const bcs_hash_type hash_type, + const size_t security_parameter, const size_t leaf_size); template two_to_one_hash_function get_two_to_one_hash(const bcs_hash_type hash_enum, const size_t security_parameter); +template +cap_hash_function get_cap_hash(const bcs_hash_type hash_enum, const size_t security_parameter); + } #include "libiop/bcs/hashing/hash_enum.tcc" diff --git a/libiop/bcs/hashing/hash_enum.tcc b/libiop/bcs/hashing/hash_enum.tcc index 8942c756..490adaef 100644 --- a/libiop/bcs/hashing/hash_enum.tcc +++ b/libiop/bcs/hashing/hash_enum.tcc @@ -58,7 +58,7 @@ std::shared_ptr> get_hashchain_internal( { return std::make_shared>(security_parameter); } - throw std::invalid_argument("bcs_hash_type unknown"); + throw std::invalid_argument("bcs_hash_type unknown (blake2b hashchain)"); } @@ -68,8 +68,23 @@ std::shared_ptr> get_hashchain(bcs_hash_type has return get_hashchain_internal(FieldT::zero(), hash_enum, security_parameter); } +/* Binary_hash_digest leafhash */ +template +std::shared_ptr> get_leafhash_internal( + const typename libff::enable_if::value, FieldT>::type _, + const bcs_hash_type hash_enum, + const size_t security_parameter, + const size_t leaf_size) +{ + if (hash_enum == blake2b_type) + { + return std::make_shared>(security_parameter); + } + throw std::invalid_argument("bcs_hash_type unknown (blake2b leaf hash)"); +} + /* Algebraic leafhash case */ -template +template std::shared_ptr> get_leafhash_internal( const typename libff::enable_if::value, FieldT>::type _, const bcs_hash_type hash_enum, @@ -85,55 +100,39 @@ std::shared_ptr> get_leafhash_internal( poseidon_params params = get_poseidon_parameters(hash_enum); /* security parameter is -1 b/c */ std::shared_ptr> permutation = std::make_shared>(params); - std::shared_ptr> leafhasher = std::make_shared>( - permutation, - security_parameter - 1); + std::shared_ptr> leafhasher = + std::make_shared>(permutation, security_parameter - 1); return leafhasher; } throw std::invalid_argument("bcs_hash_type unknown (algebraic leaf hash)"); } -/* Binary_hash_digest leafhash */ -template -std::shared_ptr> get_leafhash_internal( - const typename libff::enable_if::value, FieldT>::type _, - const bcs_hash_type hash_enum, - const size_t security_parameter, - const size_t leaf_size) -{ - if (hash_enum == blake2b_type) - { - return std::make_shared>(security_parameter); - } - throw std::invalid_argument("bcs_hash_type unknown"); -} - -template +template std::shared_ptr> get_leafhash( const bcs_hash_type hash_enum, const size_t security_parameter, const size_t leaf_size) { - return get_leafhash_internal(FieldT::zero(), hash_enum, security_parameter, leaf_size); + return get_leafhash_internal(FieldT::zero(), hash_enum, security_parameter, leaf_size); } /* binary hash digest 2->1 hash */ template two_to_one_hash_function get_two_to_one_hash_internal( - const typename libff::enable_if::value, FieldT>::type _, - const bcs_hash_type hash_enum, + const typename libff::enable_if::value, FieldT>::type _, + const bcs_hash_type hash_enum, const size_t security_parameter) { if (hash_enum == blake2b_type) { return blake2b_two_to_one_hash; } - throw std::invalid_argument("bcs_hash_type unknown"); + throw std::invalid_argument("bcs_hash_type unknown (blake2b two to one hash)"); } /* algebraic 2->1 hash */ template two_to_one_hash_function get_two_to_one_hash_internal( - const typename libff::enable_if::value, FieldT>::type _, - const bcs_hash_type hash_enum, + const typename libff::enable_if::value, FieldT>::type _, + const bcs_hash_type hash_enum, const size_t security_parameter) { if (hash_enum == starkware_poseidon_type || hash_enum == high_alpha_poseidon_type) @@ -149,7 +148,7 @@ two_to_one_hash_function get_two_to_one_hash_internal( as this reference has to live after the function terminates */ std::shared_ptr> hash_class = std::make_shared>(permutation, security_parameter - 1); - std::function f = [permutation, hash_class](const FieldT& left, const FieldT& right, const std::size_t unused) -> FieldT + std::function f = [permutation, hash_class](const FieldT& left, const FieldT& right, const std::size_t unused) -> FieldT { return hash_class->hash(left, right); }; @@ -164,4 +163,53 @@ two_to_one_hash_function get_two_to_one_hash(const bcs_hash_type hash return get_two_to_one_hash_internal(FieldT::zero(), hash_enum, security_parameter); } +/* Hash digest 2^n->1 hash. */ +template +cap_hash_function get_cap_hash_internal( + const typename libff::enable_if::value, FieldT>::type _, + const bcs_hash_type hash_enum, + const size_t security_parameter) +{ + if (hash_enum == blake2b_type) + { + return blake2b_many_to_one_hash; + } + throw std::invalid_argument("bcs_hash_type unknown (blake2b cap hash)"); } + +/* Algebraic 2^n->1 hash. */ +template +cap_hash_function get_cap_hash_internal( + const typename libff::enable_if::value, FieldT>::type _, + const bcs_hash_type hash_enum, + const size_t security_parameter) +{ + if (hash_enum == starkware_poseidon_type || hash_enum == high_alpha_poseidon_type) + { + if (security_parameter != 128) + { + throw std::invalid_argument("Poseidon only supported for 128 bit soundness."); + } + poseidon_params params = get_poseidon_parameters(hash_enum); + /* security parameter is -1 b/c */ + std::shared_ptr> permutation = std::make_shared>(params); + /* We explicitly place this on heap with no destructor, + as this reference has to live after the function terminates */ + std::shared_ptr> hash_class = + std::make_shared>(permutation, security_parameter - 1); + std::function &leaf, const std::size_t)> f = [permutation, hash_class](const std::vector &leaf, const std::size_t unused) -> FieldT + { + return hash_class->hash(leaf); + }; + return f; + } + throw std::invalid_argument("bcs_hash_type unknown (algebraic cap hash)"); +} + +template +cap_hash_function get_cap_hash(const bcs_hash_type hash_enum, const size_t security_parameter) +{ + return get_cap_hash_internal(FieldT::zero(), hash_enum, security_parameter); +} + +} // namespace libiop diff --git a/libiop/bcs/hashing/hashing.hpp b/libiop/bcs/hashing/hashing.hpp index 29473a1c..a4f7d977 100644 --- a/libiop/bcs/hashing/hashing.hpp +++ b/libiop/bcs/hashing/hashing.hpp @@ -52,6 +52,10 @@ class leafhash template using two_to_one_hash_function = std::function; +/* Function used for cap hash of merkle tree which takes in a vector of size 2^n. */ +template +using cap_hash_function = std::function&, const std::size_t)>; + /* Sizeof algebraic hash */ template size_t get_hash_size(const typename libff::enable_if::value, hash_type>::type h) diff --git a/libiop/bcs/merkle_tree.hpp b/libiop/bcs/merkle_tree.hpp index 522328eb..df5eef1d 100644 --- a/libiop/bcs/merkle_tree.hpp +++ b/libiop/bcs/merkle_tree.hpp @@ -20,7 +20,7 @@ namespace libiop { -/* Authentication paths for a set of positions */ +/** Authentication paths for a set of positions. */ template struct merkle_tree_set_membership_proof { std::vector auxiliary_hashes; @@ -46,6 +46,12 @@ template class merkle_tree { protected: bool constructed_; + /** + * inner_nodes_ is a vector of the `2 * num_leaves_ - 1` nodes in the tree, with the root + * at index 0, left child at 1, right child at 2, etc. If cap_size_ is greater than 2, the + * first `log_2(cap_size_) - 1` layers under (and not including) the root are empty to make + * the math easier. + */ std::vector inner_nodes_; std::size_t num_leaves_; @@ -54,48 +60,78 @@ class merkle_tree { std::size_t digest_len_bytes_; bool make_zk_; std::size_t num_zk_bytes_; + /** + * The top `log_2(cap_size_)` layers are hashed with a single computation to improve efficiency. + * The root along with its cap_size_ direct children are referred to as the "cap," and the + * operation that transforms these children to the root is the cap hash. + * See https://github.com/scipr-lab/libiop/issues/41. + */ + cap_hash_function cap_hasher_; + /** + * cap_size_ is the number of direct children the root has. It must be a power of 2 and at + * least 2. For example if cap_size == 4, the root has 4 children, and in inner_nodes_ the + * indices 1 and 2 are unused. + */ + std::size_t cap_size_; - /* Each element will be hashed (individually) to produce a random hash digest. */ + /** Each element will be hashed (individually) to produce a random hash digest. */ std::vector zk_leaf_randomness_elements_; void sample_leaf_randomness(); void compute_inner_nodes(); + + /* Helper functions for dealing with the tree strucutre. Correctness not guaranteed + when out of bounds. */ + std::size_t parent_of(const std::size_t node_index) const; + std::size_t left_child_of(const std::size_t node_index) const; + std::size_t right_child_of(const std::size_t node_index) const; + bool is_in_cap(const std::size_t node_index) const; + std::size_t cap_children_start() const; // Inclusive. + std::size_t cap_children_end() const; // Exclusive. public: - /* Create a merkle tree with the given configuration. - If make_zk is true, 2 * security parameter random bytes will be appended to each leaf - before hashing, to prevent a low entropy leaf value from being inferred - from its hash. */ + /** + * Create a merkle tree with the given configuration. + * If make_zk is true, 2 * security parameter random bytes will be appended to each leaf + * before hashing, to prevent a low entropy leaf value from being inferred from its hash. + * cap_size is the number of children of the root and must be a power of 2. + */ merkle_tree(const std::size_t num_leaves, const std::shared_ptr> &leaf_hasher, const two_to_one_hash_function &node_hasher, + const cap_hash_function &cap_hasher, const std::size_t digest_len_bytes, const bool make_zk, - const std::size_t security_parameter); + const std::size_t security_parameter, + const std::size_t cap_size=2); - /** This treats each leaf as a column. - * e.g. The ith leaf is the vector formed by leaf_contents[j][i] for all j */ + /** + * This treats each leaf as a column. + * e.g. The ith leaf is the vector formed by leaf_contents[j][i] for all j. + */ void construct(const std::vector>> &leaf_contents); // TODO: Remove this overload in favor of only using the former void construct(const std::vector > &leaf_contents); - /** Leaf contents is a table with `r` rows - * (`r` typically being the number of oracles) - * and (MT_num_leaves * coset_serialization_size) columns. - * Each MT leaf is the serialization of a table with `r` rows, - * and coset_serialization_size columns. + /** + * Leaf contents is a table with `r` rows + * (`r` typically being the number of oracles) + * and (MT_num_leaves * coset_serialization_size) columns. + * Each MT leaf is the serialization of a table with `r` rows, + * and coset_serialization_size columns. * - * This is done here rather than the BCS layer to avoid needing to copy the data, - * as this will take a significant amount of memory. + * This is done here rather than the BCS layer to avoid needing to copy the data, + * as this will take a significant amount of memory. */ void construct_with_leaves_serialized_by_cosets( const std::vector>> &leaf_contents, size_t coset_serialization_size); - /** Takes in a set of query positions to input oracles to a domain of size: - * `num_leaves * coset_serialization_size`, - * and the associated evaluations for each query position. + /** + * Takes in a set of query positions to input oracles to a domain of size: + * `num_leaves * coset_serialization_size`, + * and the associated evaluations for each query position. * - * This function then serializes these evaluations into leaf entries. - * The rows of a leaf entry are the same as in the eva - */ + * This function then serializes these evaluations into leaf entries. + * The rows of a leaf entry are the same as in the eva + */ std::vector> serialize_leaf_values_by_coset( const std::vector &query_positions, const std::vector > &query_responses, @@ -103,6 +139,8 @@ class merkle_tree { hash_digest_type get_root() const; + /* These two functions do not currently work if the given positions aren't sorted or + have duplicates, AND the tree is set to be zero knowledge. */ merkle_tree_set_membership_proof get_set_membership_proof( const std::vector &positions) const; bool validate_set_membership_proof( @@ -111,8 +149,12 @@ class merkle_tree { const std::vector> &leaf_contents, const merkle_tree_set_membership_proof &proof); - /* Returns number of two to one hashes */ - size_t count_hashes_to_verify_set_membership_proof( + /** + * Returns a number that is proportional to the hashing runtime of verifying a set membership + * proof. Each two-to-one hash is counted as 2 units, and each input of the cap hash is 1 unit. + * Leaf hashes are not counted. + */ + size_t count_internal_hash_complexity_to_verify_set_membership( const std::vector &positions) const; std::size_t num_leaves() const; diff --git a/libiop/bcs/merkle_tree.tcc b/libiop/bcs/merkle_tree.tcc index a86cf68d..711068ca 100644 --- a/libiop/bcs/merkle_tree.tcc +++ b/libiop/bcs/merkle_tree.tcc @@ -9,20 +9,26 @@ namespace libiop { +using std::size_t; + template merkle_tree::merkle_tree( - const std::size_t num_leaves, + const size_t num_leaves, const std::shared_ptr> &leaf_hasher, const two_to_one_hash_function &node_hasher, - const std::size_t digest_len_bytes, + const cap_hash_function &cap_hasher, + const size_t digest_len_bytes, const bool make_zk, - const std::size_t security_parameter) : + const size_t security_parameter, + const size_t cap_size) : num_leaves_(num_leaves), leaf_hasher_(leaf_hasher), node_hasher_(node_hasher), + cap_hasher_(cap_hasher), digest_len_bytes_(digest_len_bytes), make_zk_(make_zk), - num_zk_bytes_((security_parameter * 2 + 7) / 8) /* = ceil((2 * security_parameter_bits) / 8) */ + num_zk_bytes_((security_parameter * 2 + 7) / 8), /* = ceil((2 * security_parameter_bits) / 8) */ + cap_size_(cap_size) { if (num_leaves < 2 || !libff::is_power_of_2(num_leaves)) { @@ -30,6 +36,11 @@ merkle_tree::merkle_tree( throw std::invalid_argument("Merkle tree size must be a power of two, and at least 2."); } + if (cap_size < 2 || !libff::is_power_of_2(cap_size)) + { + throw std::invalid_argument("Merkle tree cap size must be a power of two, and at least 2."); + } + this->constructed_ = false; } @@ -120,7 +131,7 @@ void merkle_tree::construct_with_leaves_serialized_by_ * our slice is of size num_input_oracles * coset_size */ std::vector slice(leaf_contents.size() * coset_serialization_size, FieldT::zero()); - for (std::size_t i = 0; i < this->num_leaves_; ++i) + for (size_t i = 0; i < this->num_leaves_; ++i) { std::vector positions_in_this_slice = leaf_domain.all_positions_in_coset_i(i, coset_serialization_size); @@ -200,32 +211,40 @@ std::vector> merkle_tree::serializ template void merkle_tree::compute_inner_nodes() { - // TODO: Better document this function, its hashing layer by layer. - std::size_t n = (this->num_leaves_ - 1) / 2; + /* n is the first index of the layer we're about to compute. It starts at the bottom-left most + inner node. This hack works because num_leaves is the index of the right child of the + bottom-left inner node. */ + size_t n = this->parent_of(this->num_leaves_); while (true) { // TODO: Evaluate how much time is spent in hashing vs memory access. // For better memory efficiency, we could hash sub-tree by sub-tree // in an unrolled recursive fashion. - for (std::size_t j = n; j <= 2*n; ++j) + for (size_t j = n; j <= 2*n; ++j) { // TODO: Can we rely on left and right to be placed sequentially in memory, // for better performance in node hasher? - const hash_digest_type& left = this->inner_nodes_[2*j + 1]; - const hash_digest_type& right = this->inner_nodes_[2*j + 2]; + const hash_digest_type& left = this->inner_nodes_[this->left_child_of(j)]; + const hash_digest_type& right = this->inner_nodes_[this->right_child_of(j)]; const hash_digest_type digest = this->node_hasher_(left, right, this->digest_len_bytes_); this->inner_nodes_[j] = digest; } - if (n > 0) - { - n /= 2; - } - else + if (this->is_in_cap(n)) { + /* We are done with the main portion after the cap layer is filled out. + There is one edge case where the entire tree is the cap, and in that case we + will do some extra work, but it will still correctly compute everything. */ break; } + n /= 2; // Go to the layer obove this one. } + + // Now compute the cap hash. + auto cap_children_start = this->inner_nodes_.begin() + this->cap_children_start(); + auto cap_children_end = this->inner_nodes_.begin() + this->cap_children_end(); + std::vector cap_children(cap_children_start, cap_children_end); + this->inner_nodes_[0] = this->cap_hasher_(cap_children, this->digest_len_bytes_); } template @@ -242,7 +261,7 @@ hash_digest_type merkle_tree::get_root() const template merkle_tree_set_membership_proof merkle_tree::get_set_membership_proof( - const std::vector &positions) const + const std::vector &positions) const { if (!this->constructed_) { @@ -255,12 +274,12 @@ merkle_tree_set_membership_proof return result; } - std::vector S = positions; /* sorted set of positions */ + std::vector S = positions; /* sorted set of positions */ std::sort(S.begin(), S.end()); S.erase(std__unique(S.begin(), S.end()), S.end()); /* remove possible duplicates */ if (std::any_of(S.begin(), S.end(), - [this](const std::size_t pos) { return pos >= this->num_leaves_; })) + [this](const size_t pos) { return pos >= this->num_leaves_; })) { throw std::invalid_argument("All positions must be between 0 and num_leaves-1."); } @@ -280,37 +299,37 @@ merkle_tree_set_membership_proof /* transform leaf positions to indices in this->inner_nodes_ */ for (auto &pos : S) { - pos += (this->num_leaves_ - 1); + pos += this->num_leaves_ - 1; } - while (true) /* for every layer */ + // Each iteration adds the hashes for one layer, up until the layer below the cap. + while (true) { auto it = S.begin(); - if (*it == 0 && it == --S.end()) - { - /* we have arrived at the root */ + if (is_in_cap(*it)) + { // We have arrived at the cap, which will be handled differently. break; } - std::vector new_S; + // new_S contains the hash indices we need in the layer above this one. + std::vector new_S; while (it != S.end()) { - const std::size_t it_pos = *it; + const size_t it_pos = *it; auto next_it = ++it; /* Always process parent. */ - new_S.emplace_back((it_pos - 1)/2); + new_S.emplace_back(this->parent_of(it_pos)); - if ((it_pos & 1) == 0) + if (it_pos % 2 == 0) { - /* We are the right node, so there was no left node - (o.w. would have been processed in b) - below). Insert it as auxiliary */ + /* it_pos is a right node, so there was no left node (otherwise it would have been + processed already). Insert left node as auxiliary */ result.auxiliary_hashes.emplace_back(this->inner_nodes_[it_pos - 1]); } else { - /* We are the left node. Two cases: */ + /* it_pos is a left node. Two cases: */ if (next_it == S.end() || *next_it != it_pos + 1) { /* a) Our right sibling is not in S, so we must @@ -321,8 +340,7 @@ merkle_tree_set_membership_proof { /* b) Our right sibling is in S. So don't need auxiliary and skip over the right sibling. - (Note that only one parent will be processed.) - */ + (Note that only one parent will be processed.) */ ++next_it; } } @@ -332,13 +350,33 @@ merkle_tree_set_membership_proof std::swap(S, new_S); } + // Add the cap, i.e. the root's direct children. + // The only elements should be the cap. + assert(S.size() <= this->cap_size_); + auto it = S.begin(); + // Iterate over every direct child of the root, and add the ones not obtainable from positions. + for (size_t j = this->cap_children_start(); j < this->cap_children_end(); j++) + { + // Since S is sorted, we can just compare to the next element of S. + if (j == *it) + { + it++; + } + else + { + result.auxiliary_hashes.emplace_back(this->inner_nodes_[j]); + } + } + return result; } +/* Large portions of this code is duplicated from get_set_membership_proof, but it's just + different enough that I can't extract them into a single function. */ template bool merkle_tree::validate_set_membership_proof( const hash_digest_type &root, - const std::vector &positions, + const std::vector &positions, const std::vector> &leaf_contents, const merkle_tree_set_membership_proof &proof) { @@ -362,7 +400,7 @@ bool merkle_tree::validate_set_membership_proof( auto rand_it = proof.randomness_hashes.begin(); auto aux_it = proof.auxiliary_hashes.begin(); - typedef std::pair pos_and_digest_t; + typedef std::pair pos_and_digest_t; std::vector S; S.reserve(positions.size()); @@ -370,10 +408,15 @@ bool merkle_tree::validate_set_membership_proof( if (this->make_zk_) { for (auto &leaf : leaf_contents) { + /* FIXME: This code is currently incorrect if the given list of positions is not + sorted or has duplicates. This could be fixed if both positions and leaf_contents + are sorted before the leaf hashes are calculated, which would require refactoring. */ const zk_salt_type zk_salt = *rand_it++; leaf_hashes.emplace_back(this->leaf_hasher_->zk_hash(leaf, zk_salt)); } - } else { + } + else + { for (auto &leaf : leaf_contents) { leaf_hashes.emplace_back(this->leaf_hasher_->hash(leaf)); @@ -384,10 +427,11 @@ bool merkle_tree::validate_set_membership_proof( // with a single transform at the bottom. std::transform(positions.begin(), positions.end(), leaf_hashes.begin(), std::back_inserter(S), - [](const std::size_t pos, const hash_digest_type &hash) { + [](const size_t pos, const hash_digest_type &hash) { return std::make_pair(pos, hash); }); + std::sort(S.begin(), S.end(), compare_first); S.erase(std__unique(S.begin(), S.end()), S.end()); /* remove possible duplicates */ if (std__adjacent_find(S.begin(), S.end(), @@ -410,22 +454,23 @@ bool merkle_tree::validate_set_membership_proof( /* transform to sorted set of indices */ for (auto &pos : S) { - pos.first += (this->num_leaves_ - 1); + pos.first += this->num_leaves_ - 1; } - while (true) /* for every layer */ + // Each iteration calculates the hashes for one layer, up until the layer below the cap. + while (true) { auto it = S.begin(); - if (it->first == 0 && it == --S.end()) - { - /* we have arrived at the root */ + if (is_in_cap(it->first)) + { // We have arrived at the cap. The cap is hashed differently, so we stop here. break; } - std::vector > new_S; + // new_S contains the indices and hashes we calculate in the layer above this one. + std::vector > new_S; while (it != S.end()) { - const std::size_t it_pos = it->first; + const size_t it_pos = it->first; const hash_digest_type it_hash = it->second; auto next_it = ++it; @@ -433,17 +478,16 @@ bool merkle_tree::validate_set_membership_proof( hash_digest_type left_hash; hash_digest_type right_hash; - if ((it_pos & 1) == 0) + if (it_pos % 2 == 0) { - /* We are the right node, so there was no left node - (o.w. would have been processed in b) - below). Take it from the auxiliary. */ + /* it_pos is a right node, so there was no left node (otherwise it would have been + processed already). Take left node from the auxiliary. */ left_hash = *aux_it++; right_hash = it_hash; } else { - /* We are the left node. Two cases: */ + /* it_pos is a left node. Two cases: */ left_hash = it_hash; if (next_it == S.end() || next_it->first != it_pos + 1) @@ -463,7 +507,7 @@ bool merkle_tree::validate_set_membership_proof( } } - const std::size_t parent_pos = (it_pos - 1)/2; + const size_t parent_pos = this->parent_of(it_pos); const hash_digest_type parent_hash = this->node_hasher_(left_hash, right_hash, this->digest_len_bytes_); new_S.emplace_back(std::make_pair(parent_pos, parent_hash)); @@ -474,27 +518,53 @@ bool merkle_tree::validate_set_membership_proof( std::swap(S, new_S); } + // Add the cap, including the root's direct children and the root. + // The only elements should be the cap (not including the root). + assert(S.size() <= this->cap_size_); + + auto it = S.begin(); + std::vector cap_children; + cap_children.reserve(this->cap_size_); + /* Iterate over every direct child of the root, choosing either the calculated hash or + auxiliary hash. */ + for (size_t j = this->cap_children_start(); j < this->cap_children_end(); j++) + { + // Since S is sorted, we can just compare to the next element of S. + if (it != S.end() && j == it->first) + { + cap_children.emplace_back(it->second); + it++; + } + else + { + cap_children.emplace_back(*aux_it); + aux_it++; + } + } + if (aux_it != proof.auxiliary_hashes.end()) { throw std::logic_error("Validation did not consume the entire proof."); } - return (S.begin()->second == root); + return this->cap_hasher_(cap_children, this->digest_len_bytes_) == root; } template -size_t merkle_tree::count_hashes_to_verify_set_membership_proof( - const std::vector &positions) const +size_t merkle_tree::count_internal_hash_complexity_to_verify_set_membership( + const std::vector &positions) const { /** This goes layer by layer, * and counts the number of hashes needed to be computed. * Essentially when moving up a layer in the verifier, * every unique parent is one hash that has to be computed. */ - size_t num_two_to_one_hashes = 0; + size_t num_two_to_one_hashes = 0; std::vector cur_pos_set = positions; sort(cur_pos_set.begin(), cur_pos_set.end()); assert(cur_pos_set[cur_pos_set.size() - 1] < this->num_leaves()); - for (size_t cur_depth = this->depth(); cur_depth > 0; cur_depth--) + + const size_t cap_depth = libff::log2(this->cap_size_); + for (size_t cur_depth = this->depth(); cur_depth > cap_depth; cur_depth--) { // contains positions in range [0, 2^{cur_depth - 1}) std::vector next_pos_set; @@ -502,8 +572,7 @@ size_t merkle_tree::count_hashes_to_verify_set_members { size_t parent_pos = cur_pos_set[i] / 2; // Check that parent pos isn't already in next pos set - if (next_pos_set.size() == 0 - || next_pos_set[next_pos_set.size() - 1] != parent_pos) + if (next_pos_set.size() == 0 || next_pos_set[next_pos_set.size() - 1] != parent_pos) { next_pos_set.emplace_back(parent_pos); } @@ -511,17 +580,17 @@ size_t merkle_tree::count_hashes_to_verify_set_members num_two_to_one_hashes += next_pos_set.size(); cur_pos_set = next_pos_set; } - return num_two_to_one_hashes; + return 2 * num_two_to_one_hashes + this->cap_size_; } template -std::size_t merkle_tree::num_leaves() const +size_t merkle_tree::num_leaves() const { return (this->num_leaves_); } template -std::size_t merkle_tree::depth() const +size_t merkle_tree::depth() const { return libff::log2(this->num_leaves_); } @@ -533,10 +602,45 @@ bool merkle_tree::zk() const } template -std::size_t merkle_tree::num_total_bytes() const +size_t merkle_tree::num_total_bytes() const { return (this->digest_len_bytes_ * (2 * this->num_leaves() - 1)); } +template +size_t merkle_tree::parent_of(const std::size_t node_index) const +{ + return (node_index - 1) / 2; +} + +template +size_t merkle_tree::left_child_of(const std::size_t node_index) const +{ + return 2 * node_index + 1; +} + +template +size_t merkle_tree::right_child_of(const std::size_t node_index) const +{ + return 2 * node_index + 2; +} + +template +bool merkle_tree::is_in_cap(const std::size_t node_index) const +{ + return node_index < this->cap_children_end(); +} + +template +size_t merkle_tree::cap_children_start() const +{ + return this->cap_size_ - 1; +} + +template +size_t merkle_tree::cap_children_end() const +{ + return this->cap_size_ * 2 - 1; +} } // libiop diff --git a/libiop/bcs/pow.tcc b/libiop/bcs/pow.tcc index 184eb873..83a0fa78 100644 --- a/libiop/bcs/pow.tcc +++ b/libiop/bcs/pow.tcc @@ -76,7 +76,7 @@ hash_digest_type pow::solve_pow_internal( const typename libff::enable_if::value, hash_digest_type>::type challenge) const { FieldT pow = FieldT::zero(); - while (this->verify_pow(node_hasher, challenge, pow) == false) + while (!this->verify_pow(node_hasher, challenge, pow)) { pow += FieldT::one(); } @@ -93,7 +93,7 @@ hash_digest_type pow::solve_pow_internal( size_t num_words = pow.length() / sizeof(size_t); size_t pow_int = 0; - while (this->verify_pow(node_hasher, challenge, pow) == false) + while (!this->verify_pow(node_hasher, challenge, pow)) { std::memcpy(&pow[(num_words - 1)*sizeof(size_t)], &pow_int, sizeof(size_t)); pow_int += 1; @@ -147,15 +147,7 @@ bool pow::verify_pow_internal( size_t least_significant_word; std::memcpy(&least_significant_word, &hash[(num_words - 1)*sizeof(size_t)], sizeof(size_t)); size_t relevant_bits = least_significant_word & ((1 << this->parameters_.pow_bitlen()) - 1); - if (relevant_bits <= this->parameters_.pow_upperbound()) - { - // printf("%d\n", (1 << this->parameters_.pow_bitlen())); - // printf("%zu\n", least_significant_word); - // printf("%\n", relevant_bits); - // print_string_in_hex(hash); - return true; - } - return false; + return relevant_bits <= this->parameters_.pow_upperbound(); } } // libiop diff --git a/libiop/tests/bcs/test_bcs_transformation.cpp b/libiop/tests/bcs/test_bcs_transformation.cpp index 3e07fa70..f4320b87 100644 --- a/libiop/tests/bcs/test_bcs_transformation.cpp +++ b/libiop/tests/bcs/test_bcs_transformation.cpp @@ -44,6 +44,7 @@ void set_bcs_parameters_leafhash(bcs_transformation_parameters>(security_parameter); params.compression_hasher = blake2b_two_to_one_hash; + params.cap_hasher = blake2b_many_to_one_hash; } // Algebraic case @@ -52,7 +53,8 @@ template ¶ms) { params.leafhasher_ = std::make_shared>(); - params.compression_hasher = dummy_algebraic_two_to_one_hash; + params.compression_hasher = dummy_algebraic_two_to_one_hash; + params.cap_hasher = dummy_algebraic_cap_hash; } template @@ -70,6 +72,7 @@ bcs_transformation_parameters get_bcs_parameters(bool alge bcs_parameters.hash_enum = bcs_hash_type::blake2b_type; } set_bcs_parameters_leafhash(bcs_parameters); + bcs_parameters.cap_size = 2; return bcs_parameters; } diff --git a/libiop/tests/bcs/test_merkle_tree.cpp b/libiop/tests/bcs/test_merkle_tree.cpp index 8bafcf92..b0fdd75b 100644 --- a/libiop/tests/bcs/test_merkle_tree.cpp +++ b/libiop/tests/bcs/test_merkle_tree.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "libiop/algebra/utils.hpp" @@ -12,43 +13,54 @@ namespace libiop { +using std::size_t; + template< bool B, class T = void > using enable_if_t = typename libff::enable_if::type; + // Binary hash type template::value, int> = 42> merkle_tree new_MT( - const std::size_t size, const std::size_t digest_len_bytes, const bool make_zk, - const std::size_t security_parameter) + const size_t size, const size_t digest_len_bytes, const bool make_zk, + const size_t security_parameter, const size_t cap_size=2) { return merkle_tree( size, std::make_shared>(security_parameter), blake2b_two_to_one_hash, + blake2b_many_to_one_hash, digest_len_bytes, make_zk, - security_parameter); + security_parameter, + cap_size); } + template::value, int> = 42> merkle_tree new_MT( - const std::size_t size, const std::size_t digest_len_bytes, const bool make_zk, - const std::size_t security_parameter) + const size_t size, const size_t digest_len_bytes, const bool make_zk, + const size_t security_parameter, const size_t cap_size=2) { return merkle_tree( size, std::make_shared>(), dummy_algebraic_two_to_one_hash, + dummy_algebraic_cap_hash, digest_len_bytes, make_zk, - security_parameter); + security_parameter, + cap_size); } +/** Constructs a merkle tree with leaf size 2. Generates and verifies membership proofs for + * each leaf individually, and makes sure reversing the contents of each leaf causes the + * verification to fail (unless the leaf contents are symmetric). */ template -void run_simple_MT_test(const std::size_t size, const std::size_t digest_len_bytes, const bool make_zk, - const std::size_t security_parameter) { +void run_simple_MT_test(const size_t size, const size_t digest_len_bytes, const bool make_zk, + const size_t security_parameter, const size_t cap_size) { merkle_tree tree = - new_MT(size, digest_len_bytes, make_zk, security_parameter); + new_MT(size, digest_len_bytes, make_zk, security_parameter, cap_size); const std::vector vec1 = random_vector(size); const std::vector vec2 = random_vector(size); @@ -56,7 +68,7 @@ void run_simple_MT_test(const std::size_t size, const std::size_t digest_len_byt const hash_type root = tree.get_root(); - for (std::size_t i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { /* membership proof for the set {i} */ const std::vector set = {i}; @@ -96,37 +108,52 @@ void run_simple_MT_test(const std::size_t size, const std::size_t digest_len_byt TEST(MerkleTreeTest, SimpleTest) { typedef libff::gf64 FieldT; - const std::size_t size = 16; - const std::size_t digest_len_bytes = 256/8; - const std::size_t security_parameter = 128; - run_simple_MT_test(size, digest_len_bytes, false, security_parameter); - run_simple_MT_test(size, digest_len_bytes, false, security_parameter); + const size_t size = 16; + const std::vector cap_sizes = {2, 4, 8, 16}; // Test all possible cap sizes. + const size_t digest_len_bytes = 256/8; + const size_t security_parameter = 128; + + for (size_t cap_size : cap_sizes) + { + run_simple_MT_test(size, digest_len_bytes, false, + security_parameter, cap_size); + run_simple_MT_test(size, digest_len_bytes, false, + security_parameter, cap_size); + } } TEST(MerkleTreeZKTest, SimpleTest) { typedef libff::gf64 FieldT; - const std::size_t size_small = 16; - const std::size_t size_large = 1ull << 18; /* The goal is to test batch randomness logic */ - const std::size_t digest_len_bytes = 256/8; - const std::size_t security_parameter = 128; - run_simple_MT_test(size_small, digest_len_bytes, true, security_parameter); - run_simple_MT_test(size_large, digest_len_bytes, true, security_parameter); + const size_t size_small = 16; + const size_t size_large = 1ull << 18; /* The goal is to test batch randomness logic */ + const size_t cap_size = 4; + const size_t digest_len_bytes = 256/8; + const size_t security_parameter = 128; + run_simple_MT_test(size_small, digest_len_bytes, true, + security_parameter, cap_size); + run_simple_MT_test(size_large, digest_len_bytes, true, + security_parameter, cap_size); } -void run_multi_test(const bool make_zk) { +/** Constructs a merkle tree with 8 leaves each of size 2, and cap size 4. Generates and verifies + * membership proofs for every possible subset of leaves. */ +void run_fixed_multi_test(const bool make_zk) { typedef libff::gf64 FieldT; - const std::size_t size = 8; - const std::size_t security_parameter = 128; - const std::size_t digest_len_bytes = 256/8; + // The size is fixed because large values would quickly cause the program run out of memory. + const size_t size = 8; + const size_t cap_size = 4; + const size_t security_parameter = 128; + const size_t digest_len_bytes = 256/8; const bool algebraic_hash = false; merkle_tree tree = new_MT( size, digest_len_bytes, make_zk, - security_parameter); + security_parameter, + cap_size); const std::vector vec1 = random_vector(size); const std::vector vec2 = random_vector(size); @@ -135,47 +162,142 @@ void run_multi_test(const bool make_zk) { const binary_hash_digest root = tree.get_root(); - std::vector> leafs; - for (std::size_t i = 0; i < size; ++i) + std::vector> leaves; + for (size_t i = 0; i < size; ++i) { std::vector leaf({ vec1[i], vec2[i] }); - leafs.emplace_back(leaf); + leaves.emplace_back(leaf); } - for (std::size_t subset = 0; subset < (1ull< subset_elements; - std::vector> subset_leafs; - for (std::size_t k = 0; k < size; ++k) + std::vector subset_elements; + std::vector> subset_leaves; + for (size_t k = 0; k < size; ++k) { if (subset & (1ull< mp = tree.get_set_membership_proof(subset_elements); + const merkle_tree_set_membership_proof mp = + tree.get_set_membership_proof(subset_elements); + + const bool is_valid = tree.validate_set_membership_proof(root, + subset_elements, + subset_leaves, + mp); + EXPECT_TRUE(is_valid); + } +} + +TEST(MerkleTreeTest, FixedMultiTest) { + const bool make_zk = false; + run_fixed_multi_test(make_zk); +} + +TEST(MerkleTreeZKTest, FixedMultiTest) { + const bool make_zk = true; + run_fixed_multi_test(make_zk); +} + +/** Constructs a merkle tree with leaf size 2. Generates and verifies membership proofs for some + * randomly generated sorted subset of leaves of specified size, with no duplicates. Queries with + * unsorted, duplicated lists of leaves currently only work when it is not zero knowledge. */ +void run_random_multi_test(const size_t size, const size_t digest_len_bytes, const bool make_zk, + const size_t security_parameter, const size_t cap_size, + const size_t subset_size) { + typedef libff::gf64 FieldT; + + const bool algebraic_hash = false; + const size_t num_iterations = 1; // The number of randomly generated subsets to test. + + merkle_tree tree = new_MT( + size, + digest_len_bytes, + make_zk, + security_parameter, + cap_size); + + const std::vector vec1 = random_vector(size); + const std::vector vec2 = random_vector(size); + + tree.construct({ vec1, vec2 }); + + const binary_hash_digest root = tree.get_root(); + + std::vector> leaves; + leaves.reserve(size); + std::vector shuffled_leaf_indices; + shuffled_leaf_indices.reserve(size); + for (size_t i = 0; i < size; ++i) + { + std::vector leaf({ vec1[i], vec2[i] }); + leaves.emplace_back(leaf); + shuffled_leaf_indices.emplace_back(i); + } + + for (size_t i = 0; i < num_iterations; i++) + { + std::vector subset_elements; + std::vector> subset_leaves; + /* TODO: The commented-out code generates subsets that are unsorted and may be repeats. + They are not used because the code currently cannot handle these cases if it is + zero knowledge. */ + // for (size_t j = 0; j < subset_size; j++) + // { + // size_t k = randombytes_uniform(size); + // subset_elements.emplace_back(k); + // subset_leaves.emplace_back(leaves[k]); + // } + + // Generate a random sorted subset of indices at the beginning of shuffled_leaf_indices. + std::shuffle(shuffled_leaf_indices.begin(), shuffled_leaf_indices.end(), + std::default_random_engine(i)); + std::sort(shuffled_leaf_indices.begin(), shuffled_leaf_indices.begin() + subset_size); + for (size_t j = 0; j < subset_size; j++) + { + size_t k = shuffled_leaf_indices[j]; + subset_elements.emplace_back(k); + subset_leaves.emplace_back(leaves[k]); + } + + const merkle_tree_set_membership_proof mp = + tree.get_set_membership_proof(subset_elements); const bool is_valid = tree.validate_set_membership_proof(root, subset_elements, - subset_leafs, + subset_leaves, mp); EXPECT_TRUE(is_valid); } } -TEST(MerkleTreeTest, MultiTest) { +TEST(MerkleTreeTest, RandomMultiTest) { + const size_t security_parameter = 128; + const size_t digest_len_bytes = 256/8; const bool make_zk = false; - run_multi_test(make_zk); + // Test a small and a large tree. + run_random_multi_test(16, digest_len_bytes, make_zk, security_parameter, 4, 5); + run_random_multi_test(1ull << 16, digest_len_bytes, make_zk, security_parameter, 256, 100); } -TEST(MerkleTreeZKTest, MultiTest) { +TEST(MerkleTreeZKTest, RandomMultiTest) { + const size_t security_parameter = 128; + const size_t digest_len_bytes = 256/8; const bool make_zk = true; - run_multi_test(make_zk); + // Test a small and a large tree. + run_random_multi_test(16, digest_len_bytes, make_zk, security_parameter, 4, 5); + run_random_multi_test(1ull << 16, digest_len_bytes, make_zk, security_parameter, 256, 100); } -TEST(MerkleTreeTwoToOneHashTest, SimpleTest) +/** Verify that count_internal_hash_complexity_to_verify_set_membership is correct for a fixed tree + * size and query set, for various cap sizes. */ +TEST(MerkleTreeHashCountTest, SimpleTest) { typedef libff::gf64 FieldT; bool make_zk = false; @@ -184,16 +306,24 @@ TEST(MerkleTreeTwoToOneHashTest, SimpleTest) const size_t hash_length = 32; const bool algebraic_hash = false; - merkle_tree tree = new_MT( - num_leaves, - hash_length, - make_zk, - security_parameter); + const std::vector cap_sizes = {2, 4, 8}; + const std::vector expected_num_hashes = {12, 10, 8}; + + const std::vector positions = {1, 3, 6, 7}; - std::vector positions = {1, 3, 6, 7}; - size_t expected_num_hashes = 6; - size_t actual_num_hashes = tree.count_hashes_to_verify_set_membership_proof(positions); - ASSERT_EQ(expected_num_hashes, actual_num_hashes); + for (size_t i = 0; i < cap_sizes.size(); i++) + { + merkle_tree tree = new_MT( + num_leaves, + hash_length, + make_zk, + security_parameter, + cap_sizes[i]); + + size_t actual_num_hashes = tree.count_internal_hash_complexity_to_verify_set_membership( + positions); + ASSERT_EQ(expected_num_hashes[i], actual_num_hashes); + } } }