ixa/
hashing.rs

1//! This module provides a deterministic hasher and `HashMap` and `HashSet` variants that use
2//! it. The hashing data structures in the standard library are not deterministic:
3//!
4//! > By default, HashMap uses a hashing algorithm selected to provide
5//! > resistance against HashDoS attacks. The algorithm is randomly seeded, and a
6//! > reasonable best-effort is made to generate this seed from a high quality,
7//! > secure source of randomness provided by the host without blocking the program.
8//!
9//! The standard library `HashMap` has a `new` method, but `HashMap<K, V, S>` does not have a `new`
10//! method by default. Use `HashMap::default()` instead to create a new hashmap with the default
11//! hasher. If you really need to keep the API the same across implementations, we provide the
12//! `HashMapExt` trait extension. Similarly, for `HashSet` and `HashSetExt`.The traits need only be
13//! in scope.
14//!
15
16use std::hash::{BuildHasherDefault, Hash, Hasher};
17
18use bincode::serde::encode_to_vec as serialize_to_vec;
19pub use indexmap::set::Iter as IndexSetIter;
20use indexmap::IndexSet as RawIndexSet;
21pub use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet, FxHasher};
22use serde::Serialize;
23use xxhash_rust::xxh3::Xxh3Default;
24
25type FxBuildHasher = BuildHasherDefault<FxHasher>;
26
27pub type IndexSet<T> = RawIndexSet<T, FxBuildHasher>;
28
29pub type HashValueType = u128;
30
31/// Provides API parity with `std::collections::HashMap`.
32pub trait HashMapExt {
33    fn new() -> Self;
34}
35
36impl<K, V> HashMapExt for HashMap<K, V> {
37    fn new() -> Self {
38        HashMap::default()
39    }
40}
41
42// Note that trait aliases are not yet stabilized in rustc.
43// See https://github.com/rust-lang/rust/issues/41517
44/// Provides API parity with `std::collections::HashSet`.
45pub trait HashSetExt {
46    type Item;
47
48    fn new() -> Self;
49
50    /// Equivalent to `self.iter().cloned().collect::<Vec<_>>()`.
51    fn to_owned_vec(&self) -> Vec<Self::Item>;
52}
53
54impl<T: Clone> HashSetExt for HashSet<T> {
55    type Item = T;
56
57    fn new() -> Self {
58        HashSet::default()
59    }
60
61    fn to_owned_vec(&self) -> Vec<Self::Item> {
62        self.iter().cloned().collect()
63    }
64}
65
66impl<T: Clone> HashSetExt for IndexSet<T> {
67    type Item = T;
68
69    fn new() -> Self {
70        IndexSet::default()
71    }
72
73    fn to_owned_vec(&self) -> Vec<Self::Item> {
74        self.iter().cloned().collect()
75    }
76}
77
78/// A convenience method to compute the hash of a `&str`.
79pub fn hash_str(data: &str) -> u64 {
80    let mut hasher = rustc_hash::FxHasher::default();
81    hasher.write(data.as_bytes());
82    hasher.finish()
83}
84
85// Helper for any T: Hash
86pub fn one_shot_128<T: Hash>(value: &T) -> u128 {
87    let mut h = Xxh3Default::default();
88    value.hash(&mut h);
89    h.digest128()
90}
91
92pub fn hash_serialized_128<T: Serialize>(value: T) -> u128 {
93    let serialized = serialize_to_vec(&value, bincode::config::standard()).unwrap();
94    // The `xxh3_128` should be a little faster, but it is not guaranteed to produce the same hash.
95    // xxh3_128(serialized.as_slice())
96    one_shot_128(&serialized.as_slice())
97}
98
99#[cfg(test)]
100mod tests {
101    use bincode::serde::encode_to_vec as serialize_to_vec;
102    use serde::Serialize;
103
104    use super::*;
105
106    #[test]
107    fn hash_serialized_equals_one_shot() {
108        let value = "hello";
109        let a = hash_serialized_128(value);
110        let serialized = serialize_to_vec(value, bincode::config::standard()).unwrap();
111        let b = one_shot_128(&serialized.as_slice());
112
113        assert_eq!(a, b);
114    }
115
116    #[test]
117    fn hashes_strings() {
118        let a = one_shot_128(&"hello");
119        let b = one_shot_128(&"hello");
120        let c = one_shot_128(&"world");
121        assert_eq!(a, b);
122        assert_ne!(a, c);
123    }
124
125    #[test]
126    fn hashes_structs() {
127        #[derive(Hash)]
128        struct S {
129            x: u32,
130            y: String,
131        }
132        let h1 = one_shot_128(&S {
133            x: 1,
134            y: "a".into(),
135        });
136        let h2 = one_shot_128(&S {
137            x: 1,
138            y: "a".into(),
139        });
140        assert_eq!(h1, h2);
141    }
142
143    #[test]
144    fn serialization_is_concatenation() {
145        // We rely on the fact that the serialization of a tuple is the concatenation of the
146        // component types, and likewise for structs. This tests that invariant.
147
148        #[derive(Debug, Serialize)]
149        struct MyStruct {
150            name: &'static str,
151            age: i32,
152            height: f64,
153        }
154
155        let my_struct = MyStruct {
156            name: "John",
157            age: 25,
158            height: 1.80,
159        };
160        let my_tuple = ("John", 25, 1.80);
161
162        let encoded_struct = serialize_to_vec(my_struct, bincode::config::standard()).unwrap();
163        let encoded_tuple = serialize_to_vec(my_tuple, bincode::config::standard()).unwrap();
164
165        assert_eq!(encoded_struct, encoded_tuple);
166
167        let encoded_str = bincode::encode_to_vec("John", bincode::config::standard()).unwrap();
168        let encoded_int = bincode::encode_to_vec(25, bincode::config::standard()).unwrap();
169        let encoded_float = bincode::encode_to_vec(1.80, bincode::config::standard()).unwrap();
170        let flattened = encoded_str
171            .iter()
172            .copied()
173            .chain(encoded_int.iter().copied())
174            .chain(encoded_float.iter().copied())
175            .collect::<Vec<u8>>();
176
177        assert_eq!(flattened, encoded_tuple);
178    }
179}