ixa/
hashing.rs

1//! This module provides a deterministic hasher and `HashMap` and `HashSet` variants that use
2//! it. The hashing data structures in the standard library are not deterministic:
3//!
4//! > By default, HashMap uses a hashing algorithm selected to provide
5//! > resistance against HashDoS attacks. The algorithm is randomly seeded, and a
6//! > reasonable best-effort is made to generate this seed from a high quality,
7//! > secure source of randomness provided by the host without blocking the program.
8//!
9//! The standard library `HashMap` has a `new` method, but `HashMap<K, V, S>` does not have a `new`
10//! method by default. Use `HashMap::default()` instead to create a new hashmap with the default
11//! hasher. If you really need to keep the API the same across implementations, we provide the
12//! `HashMapExt` trait extension. Similarly, for `HashSet` and `HashSetExt`.The traits need only be
13//! in scope.
14//!
15
16use std::hash::{BuildHasherDefault, Hash, Hasher};
17
18pub use indexmap::set::Iter as IndexSetIter;
19use indexmap::IndexSet as RawIndexSet;
20pub use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet, FxHasher};
21use xxhash_rust::xxh3::Xxh3Default;
22
23type FxBuildHasher = BuildHasherDefault<FxHasher>;
24
25pub type IndexSet<T> = RawIndexSet<T, FxBuildHasher>;
26
27pub type HashValueType = u128;
28
29pub(crate) type DeterministicHasher = Xxh3Default;
30
31/// A `rkyv` writer that streams serialized bytes directly into a `Hasher`.
32pub struct HasherWriter<'a, H> {
33    hasher: &'a mut H,
34    pos: usize,
35}
36
37impl<'a, H> HasherWriter<'a, H> {
38    #[must_use]
39    pub fn new(hasher: &'a mut H) -> Self {
40        Self { hasher, pos: 0 }
41    }
42}
43
44impl<H: Hasher> rkyv::ser::Positional for HasherWriter<'_, H> {
45    fn pos(&self) -> usize {
46        self.pos
47    }
48}
49
50impl<H: Hasher> rkyv::ser::Writer<rkyv::rancor::Error> for HasherWriter<'_, H> {
51    fn write(&mut self, bytes: &[u8]) -> Result<(), rkyv::rancor::Error> {
52        self.hasher.write(bytes);
53        self.pos += bytes.len();
54        Ok(())
55    }
56}
57
58/// A fixed-size `rkyv` writer used by macro-generated equality implementations.
59#[derive(Debug, Clone, Copy)]
60pub struct EqualityBufferWriter<const N: usize> {
61    buf: [u8; N],
62    pos: usize,
63}
64
65impl<const N: usize> EqualityBufferWriter<N> {
66    #[must_use]
67    pub fn new() -> Self {
68        Self {
69            buf: [0; N],
70            pos: 0,
71        }
72    }
73
74    #[must_use]
75    pub fn as_written(&self) -> &[u8] {
76        &self.buf[..self.pos]
77    }
78}
79
80impl<const N: usize> Default for EqualityBufferWriter<N> {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86impl<const N: usize> rkyv::ser::Positional for EqualityBufferWriter<N> {
87    fn pos(&self) -> usize {
88        self.pos
89    }
90}
91
92impl<const N: usize, E: rkyv::rancor::Source> rkyv::ser::Writer<E> for EqualityBufferWriter<N> {
93    fn write(&mut self, bytes: &[u8]) -> Result<(), E> {
94        let end = self.pos + bytes.len();
95        assert!(
96            end <= N,
97            "serialized form exceeded fixed buffer size: {} > {}",
98            end,
99            N
100        );
101        self.buf[self.pos..end].copy_from_slice(bytes);
102        self.pos = end;
103        Ok(())
104    }
105}
106
107/// Provides API parity with `std::collections::HashMap`.
108pub trait HashMapExt {
109    fn new() -> Self;
110}
111
112impl<K, V> HashMapExt for HashMap<K, V> {
113    fn new() -> Self {
114        HashMap::default()
115    }
116}
117
118// Note that trait aliases are not yet stabilized in rustc.
119// See https://github.com/rust-lang/rust/issues/41517
120/// Provides API parity with `std::collections::HashSet`.
121pub trait HashSetExt {
122    type Item;
123
124    fn new() -> Self;
125
126    /// Equivalent to `self.iter().cloned().collect::<Vec<_>>()`.
127    fn to_owned_vec(&self) -> Vec<Self::Item>;
128}
129
130impl<T: Clone> HashSetExt for HashSet<T> {
131    type Item = T;
132
133    fn new() -> Self {
134        HashSet::default()
135    }
136
137    fn to_owned_vec(&self) -> Vec<Self::Item> {
138        self.iter().cloned().collect()
139    }
140}
141
142impl<T: Clone> HashSetExt for IndexSet<T> {
143    type Item = T;
144
145    fn new() -> Self {
146        IndexSet::default()
147    }
148
149    fn to_owned_vec(&self) -> Vec<Self::Item> {
150        self.iter().cloned().collect()
151    }
152}
153
154/// A convenience method to compute the hash of a `&str`.
155pub fn hash_str(data: &str) -> u64 {
156    let mut hasher = rustc_hash::FxHasher::default();
157    hasher.write(data.as_bytes());
158    hasher.finish()
159}
160
161/// This if factored out for cases of implementations using the `Hash` trait to compute a 128-bit
162/// hash. This provides a unified interface to the implementation-specific 128-bit "finish" method.
163#[must_use]
164pub(crate) fn finish_deterministic_hash_128(hasher: DeterministicHasher) -> HashValueType {
165    hasher.digest128()
166}
167
168/// Helper for any `T: Hash` using the crate's deterministic hasher.
169pub fn one_shot_128<T: Hash>(value: &T) -> u128 {
170    let mut hasher = DeterministicHasher::default();
171    value.hash(&mut hasher);
172    finish_deterministic_hash_128(hasher)
173}
174
175#[cfg(test)]
176mod tests {
177    use std::hash::Hash;
178
179    use super::*;
180
181    #[test]
182    fn hashes_strings() {
183        let a = one_shot_128(&"hello");
184        let b = one_shot_128(&"hello");
185        let c = one_shot_128(&"world");
186        assert_eq!(a, b);
187        assert_ne!(a, c);
188    }
189
190    #[test]
191    fn hashes_structs() {
192        #[derive(Hash)]
193        struct S {
194            x: u32,
195            y: String,
196        }
197        let h1 = one_shot_128(&S {
198            x: 1,
199            y: "a".into(),
200        });
201        let h2 = one_shot_128(&S {
202            x: 1,
203            y: "a".into(),
204        });
205        assert_eq!(h1, h2);
206    }
207
208    #[test]
209    fn hashing_tuple_matches_hashing_components_in_order() {
210        let tuple = ("John", 25_u32, true);
211
212        let mut hasher = DeterministicHasher::default();
213        tuple.0.hash(&mut hasher);
214        tuple.1.hash(&mut hasher);
215        tuple.2.hash(&mut hasher);
216
217        assert_eq!(finish_deterministic_hash_128(hasher), one_shot_128(&tuple));
218    }
219}