From a1b6ba1eebcb4c7110ba8a2b4b8740f3f02fb30f Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Fri, 5 Sep 2025 15:30:28 +0200 Subject: [PATCH] grouping_map: Allow to create `GroupingMap` with a custom `BuildHasher` Currently, `GroupingMap` can create a `HashMap` with the default `RandomState`. For some kind of keys, using different `BuildHasher` implementation can result in better performance. Add a possibility to create a `GroupingMap` with a custom hasher builder, which then produces a `HashMap` with it. --- src/grouping_map.rs | 65 ++++++++++++++++++++++++++++----------------- src/lib.rs | 53 +++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 28 deletions(-) diff --git a/src/grouping_map.rs b/src/grouping_map.rs index 725e2592a..18004a7d0 100644 --- a/src/grouping_map.rs +++ b/src/grouping_map.rs @@ -2,11 +2,12 @@ use crate::{ adaptors::map::{MapSpecialCase, MapSpecialCaseFn}, MinMaxResult, }; -use std::cmp::Ordering; +use core::hash::BuildHasher; use std::collections::HashMap; use std::hash::Hash; use std::iter::Iterator; use std::ops::{Add, Mul}; +use std::{cmp::Ordering, hash::RandomState}; /// A wrapper to allow for an easy [`into_grouping_map_by`](crate::Itertools::into_grouping_map_by) pub type MapForGrouping = MapSpecialCase>; @@ -36,18 +37,30 @@ pub(crate) fn new_map_for_grouping K>( } /// Creates a new `GroupingMap` from `iter` -pub fn new(iter: I) -> GroupingMap +pub fn new(iter: I) -> GroupingMap where I: Iterator, K: Hash + Eq, { - GroupingMap { iter } + let hash_builder = RandomState::new(); + GroupingMap { iter, hash_builder } +} + +/// Creates a new `GroupingMap` from `iter` which will use the given hash +/// builder to hash keys. +pub fn with_hasher(iter: I, hash_builder: S) -> GroupingMap +where + I: Iterator, + K: Hash + Eq, + S: BuildHasher, +{ + GroupingMap { iter, hash_builder } } /// `GroupingMapBy` is an intermediate struct for efficient group-and-fold operations. /// /// See [`GroupingMap`] for more informations. -pub type GroupingMapBy = GroupingMap>; +pub type GroupingMapBy = GroupingMap, S>; /// `GroupingMap` is an intermediate struct for efficient group-and-fold operations. /// It groups elements by their key and at the same time fold each group @@ -56,14 +69,16 @@ pub type GroupingMapBy = GroupingMap>; /// No method on this struct performs temporary allocations. #[derive(Clone, Debug)] #[must_use = "GroupingMap is lazy and do nothing unless consumed"] -pub struct GroupingMap { +pub struct GroupingMap { iter: I, + hash_builder: S, } -impl GroupingMap +impl GroupingMap where I: Iterator, K: Hash + Eq, + S: BuildHasher, { /// This is the generic way to perform any operation on a `GroupingMap`. /// It's suggested to use this method only to implement custom operations @@ -105,11 +120,11 @@ where /// assert_eq!(lookup[&3], 7); /// assert_eq!(lookup.len(), 3); // The final keys are only 0, 1 and 2 /// ``` - pub fn aggregate(self, mut operation: FO) -> HashMap + pub fn aggregate(self, mut operation: FO) -> HashMap where FO: FnMut(Option, &K, V) -> Option, { - let mut destination_map = HashMap::new(); + let mut destination_map = HashMap::with_hasher(self.hash_builder); self.iter.for_each(|(key, val)| { let acc = destination_map.remove(&key); @@ -154,7 +169,7 @@ where /// assert_eq!(lookup[&2].acc, 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn fold_with(self, mut init: FI, mut operation: FO) -> HashMap + pub fn fold_with(self, mut init: FI, mut operation: FO) -> HashMap where FI: FnMut(&K, &V) -> R, FO: FnMut(R, &K, V) -> R, @@ -190,7 +205,7 @@ where /// assert_eq!(lookup[&2], 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn fold(self, init: R, operation: FO) -> HashMap + pub fn fold(self, init: R, operation: FO) -> HashMap where R: Clone, FO: FnMut(R, &K, V) -> R, @@ -225,7 +240,7 @@ where /// assert_eq!(lookup[&2], 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn reduce(self, mut operation: FO) -> HashMap + pub fn reduce(self, mut operation: FO) -> HashMap where FO: FnMut(V, &K, V) -> V, { @@ -239,7 +254,7 @@ where /// See [`.reduce()`](GroupingMap::reduce). #[deprecated(note = "Use .reduce() instead", since = "0.13.0")] - pub fn fold_first(self, operation: FO) -> HashMap + pub fn fold_first(self, operation: FO) -> HashMap where FO: FnMut(V, &K, V) -> V, { @@ -264,11 +279,11 @@ where /// assert_eq!(lookup[&2], vec![2, 5].into_iter().collect::>()); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn collect(self) -> HashMap + pub fn collect(self) -> HashMap where C: Default + Extend, { - let mut destination_map = HashMap::new(); + let mut destination_map = HashMap::with_hasher(self.hash_builder); self.iter.for_each(|(key, val)| { destination_map @@ -298,7 +313,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max(self) -> HashMap + pub fn max(self) -> HashMap where V: Ord, { @@ -324,7 +339,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max_by(self, mut compare: F) -> HashMap + pub fn max_by(self, mut compare: F) -> HashMap where F: FnMut(&K, &V, &V) -> Ordering, { @@ -353,7 +368,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max_by_key(self, mut f: F) -> HashMap + pub fn max_by_key(self, mut f: F) -> HashMap where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -379,7 +394,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min(self) -> HashMap + pub fn min(self) -> HashMap where V: Ord, { @@ -405,7 +420,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min_by(self, mut compare: F) -> HashMap + pub fn min_by(self, mut compare: F) -> HashMap where F: FnMut(&K, &V, &V) -> Ordering, { @@ -434,7 +449,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min_by_key(self, mut f: F) -> HashMap + pub fn min_by_key(self, mut f: F) -> HashMap where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -469,7 +484,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax(self) -> HashMap> + pub fn minmax(self) -> HashMap, S> where V: Ord, { @@ -499,7 +514,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax_by(self, mut compare: F) -> HashMap> + pub fn minmax_by(self, mut compare: F) -> HashMap, S> where F: FnMut(&K, &V, &V) -> Ordering, { @@ -550,7 +565,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax_by_key(self, mut f: F) -> HashMap> + pub fn minmax_by_key(self, mut f: F) -> HashMap, S> where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -577,7 +592,7 @@ where /// assert_eq!(lookup[&2], 5 + 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn sum(self) -> HashMap + pub fn sum(self) -> HashMap where V: Add, { @@ -603,7 +618,7 @@ where /// assert_eq!(lookup[&2], 5 * 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn product(self) -> HashMap + pub fn product(self) -> HashMap where V: Mul, { diff --git a/src/lib.rs b/src/lib.rs index 745e82048..9c7940406 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,7 +73,7 @@ use std::fmt; #[cfg(feature = "use_alloc")] use std::fmt::Write; #[cfg(feature = "use_std")] -use std::hash::Hash; +use std::hash::{BuildHasher, Hash, RandomState}; use std::iter::{once, IntoIterator}; #[cfg(feature = "use_alloc")] type VecDequeIntoIter = alloc::collections::vec_deque::IntoIter; @@ -3800,7 +3800,7 @@ pub trait Itertools: Iterator { /// See [`GroupingMap`] for more informations /// on what operations are available. #[cfg(feature = "use_std")] - fn into_grouping_map(self) -> GroupingMap + fn into_grouping_map(self) -> GroupingMap where Self: Iterator + Sized, K: Hash + Eq, @@ -3808,6 +3808,26 @@ pub trait Itertools: Iterator { grouping_map::new(self) } + /// Constructs a `GroupingMap`, which will use the given hash builder to + /// hash keys, to be used later with one of the efficient group-and-fold + /// operations it allows to perform. + /// + /// The input iterator must yield item in the form of `(K, V)` where the + /// value of type `K` will be used as key to identify the groups and the + /// value of type `V` as value for the folding operation. + /// + /// See [`GroupingMap`] for more informations + /// on what operations are available. + #[cfg(feature = "use_std")] + fn into_grouping_map_with_hasher(self, hash_builder: S) -> GroupingMap + where + Self: Iterator + Sized, + K: Hash + Eq, + S: BuildHasher, + { + grouping_map::with_hasher(self, hash_builder) + } + /// Constructs a `GroupingMap` to be used later with one of the efficient /// group-and-fold operations it allows to perform. /// @@ -3817,7 +3837,7 @@ pub trait Itertools: Iterator { /// See [`GroupingMap`] for more informations /// on what operations are available. #[cfg(feature = "use_std")] - fn into_grouping_map_by(self, key_mapper: F) -> GroupingMapBy + fn into_grouping_map_by(self, key_mapper: F) -> GroupingMapBy where Self: Iterator + Sized, K: Hash + Eq, @@ -3826,6 +3846,33 @@ pub trait Itertools: Iterator { grouping_map::new(grouping_map::new_map_for_grouping(self, key_mapper)) } + /// Constructs a `GroupingMap`, which will use the given hash builder to + /// hash keys, to be used later with one of the efficient group-and-fold + /// operations it allows to perform. + /// + /// The values from this iterator will be used as values for the folding operation + /// while the keys will be obtained from the values by calling `key_mapper`. + /// + /// See [`GroupingMap`] for more informations + /// on what operations are available. + #[cfg(feature = "use_std")] + fn into_grouping_map_by_with_hasher( + self, + key_mapper: F, + hash_builder: S, + ) -> GroupingMapBy + where + Self: Iterator + Sized, + K: Hash + Eq, + F: FnMut(&V) -> K, + S: BuildHasher, + { + grouping_map::with_hasher( + grouping_map::new_map_for_grouping(self, key_mapper), + hash_builder, + ) + } + /// Return all minimum elements of an iterator. /// /// # Examples