ixa/
hashing.rs

1//! This module provides a deterministic hasher and `HashMap` and `HashSet` variants that use
2//! it. The hashing data structures in the standard library are not deterministic:
3//!
4//! > By default, HashMap uses a hashing algorithm selected to provide
5//! > resistance against HashDoS attacks. The algorithm is randomly seeded, and a
6//! > reasonable best-effort is made to generate this seed from a high quality,
7//! > secure source of randomness provided by the host without blocking the program.
8//!
9//! The standard library `HashMap` has a `new` method, but `HashMap<K, V, S>` does not have a `new`
10//! method by default. Use `HashMap::default()` instead to create a new hashmap with the default
11//! hasher. If you really need to keep the API the same across implementations, we provide the
12//! `HashMapExt` trait extension. Similarly, for `HashSet` and `HashSetExt`.The traits need only be
13//! in scope.
14//!
15
16use std::hash::{Hash, Hasher};
17
18use bincode::serde::encode_to_vec as serialize_to_vec;
19pub use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
20use serde::Serialize;
21use xxhash_rust::xxh3::Xxh3Default;
22
23/// Provides API parity with `std::collections::HashMap`.
24pub trait HashMapExt {
25    fn new() -> Self;
26}
27
28impl<K, V> HashMapExt for HashMap<K, V> {
29    fn new() -> Self {
30        HashMap::default()
31    }
32}
33
34// Note that trait aliases are not yet stabilized in rustc.
35// See https://github.com/rust-lang/rust/issues/41517
36/// Provides API parity with `std::collections::HashSet`.
37pub trait HashSetExt {
38    type Item;
39
40    fn new() -> Self;
41
42    /// Equivalent to `self.iter().cloned().collect::<Vec<_>>()`.
43    fn to_owned_vec(&self) -> Vec<Self::Item>;
44}
45
46impl<T: Clone> HashSetExt for HashSet<T> {
47    type Item = T;
48
49    fn new() -> Self {
50        HashSet::default()
51    }
52
53    fn to_owned_vec(&self) -> Vec<Self::Item> {
54        self.iter().cloned().collect()
55    }
56}
57
58/// A convenience method to compute the hash of a `&str`.
59pub fn hash_str(data: &str) -> u64 {
60    let mut hasher = rustc_hash::FxHasher::default();
61    hasher.write(data.as_bytes());
62    hasher.finish()
63}
64
65// Helper for any T: Hash
66pub fn one_shot_128<T: Hash>(value: &T) -> u128 {
67    let mut h = Xxh3Default::default();
68    value.hash(&mut h);
69    h.digest128()
70}
71
72pub fn hash_serialized_128<T: Serialize>(value: T) -> u128 {
73    let serialized = serialize_to_vec(&value, bincode::config::standard()).unwrap();
74    // The `xxh3_128` should be a little faster, but it is not guaranteed to produce the same hash.
75    // xxh3_128(serialized.as_slice())
76    one_shot_128(&serialized.as_slice())
77}
78
79#[cfg(test)]
80mod tests {
81    use bincode::serde::encode_to_vec as serialize_to_vec;
82    use serde::Serialize;
83
84    use super::*;
85
86    #[test]
87    fn hash_serialized_equals_one_shot() {
88        let value = "hello";
89        let a = hash_serialized_128(value);
90        let serialized = serialize_to_vec(value, bincode::config::standard()).unwrap();
91        let b = one_shot_128(&serialized.as_slice());
92
93        assert_eq!(a, b);
94    }
95
96    #[test]
97    fn hashes_strings() {
98        let a = one_shot_128(&"hello");
99        let b = one_shot_128(&"hello");
100        let c = one_shot_128(&"world");
101        assert_eq!(a, b);
102        assert_ne!(a, c);
103    }
104
105    #[test]
106    fn hashes_structs() {
107        #[derive(Hash)]
108        struct S {
109            x: u32,
110            y: String,
111        }
112        let h1 = one_shot_128(&S {
113            x: 1,
114            y: "a".into(),
115        });
116        let h2 = one_shot_128(&S {
117            x: 1,
118            y: "a".into(),
119        });
120        assert_eq!(h1, h2);
121    }
122
123    #[test]
124    fn serialization_is_concatenation() {
125        // We rely on the fact that the serialization of a tuple is the concatenation of the
126        // component types, and likewise for structs. This tests that invariant.
127
128        #[derive(Debug, Serialize)]
129        struct MyStruct {
130            name: &'static str,
131            age: i32,
132            height: f64,
133        }
134
135        let my_struct = MyStruct {
136            name: "John",
137            age: 25,
138            height: 1.80,
139        };
140        let my_tuple = ("John", 25, 1.80);
141
142        let encoded_struct = serialize_to_vec(my_struct, bincode::config::standard()).unwrap();
143        let encoded_tuple = serialize_to_vec(my_tuple, bincode::config::standard()).unwrap();
144
145        assert_eq!(encoded_struct, encoded_tuple);
146
147        let encoded_str = bincode::encode_to_vec("John", bincode::config::standard()).unwrap();
148        let encoded_int = bincode::encode_to_vec(25, bincode::config::standard()).unwrap();
149        let encoded_float = bincode::encode_to_vec(1.80, bincode::config::standard()).unwrap();
150        let flattened = encoded_str
151            .iter()
152            .copied()
153            .chain(encoded_int.iter().copied())
154            .chain(encoded_float.iter().copied())
155            .collect::<Vec<u8>>();
156
157        assert_eq!(flattened, encoded_tuple);
158    }
159}