ixa/
hashing.rs

1//! This module provides a deterministic hasher and `HashMap` and `HashSet` variants that use
2//! it. The hashing data structures in the standard library are not deterministic:
3//!
4//! > By default, HashMap uses a hashing algorithm selected to provide
5//! > resistance against HashDoS attacks. The algorithm is randomly seeded, and a
6//! > reasonable best-effort is made to generate this seed from a high quality,
7//! > secure source of randomness provided by the host without blocking the program.
8//!
9//! The standard library `HashMap` has a `new` method, but `HashMap<K, V, S>` does not have a `new`
10//! method by default. Use `HashMap::default()` instead to create a new hashmap with the default
11//! hasher. If you really need to keep the API the same across implementations, we provide the
12//! `HashMapExt` trait extension. Similarly, for `HashSet` and `HashSetExt`.The traits need only be
13//! in scope.
14//!
15
16use bincode::serde::encode_to_vec as serialize_to_vec;
17pub use rustc_hash::FxHashMap as HashMap;
18pub use rustc_hash::FxHashSet as HashSet;
19use serde::Serialize;
20use std::hash::{Hash, Hasher};
21use xxhash_rust::xxh3::Xxh3Default;
22
23/// Provides API parity with `std::collections::HashMap`.
24pub trait HashMapExt {
25    fn new() -> Self;
26}
27
28impl<K, V> HashMapExt for HashMap<K, V> {
29    fn new() -> Self {
30        HashMap::default()
31    }
32}
33
34// Note that trait aliases are not yet stabilized in rustc.
35// See https://github.com/rust-lang/rust/issues/41517
36/// Provides API parity with `std::collections::HashSet`.
37pub trait HashSetExt {
38    fn new() -> Self;
39}
40
41impl<T> HashSetExt for HashSet<T> {
42    fn new() -> Self {
43        HashSet::default()
44    }
45}
46
47/// A convenience method to compute the hash of a `&str`.
48pub fn hash_str(data: &str) -> u64 {
49    let mut hasher = rustc_hash::FxHasher::default();
50    hasher.write(data.as_bytes());
51    hasher.finish()
52}
53
54// Helper for any T: Hash
55pub fn one_shot_128<T: Hash>(value: &T) -> u128 {
56    let mut h = Xxh3Default::default();
57    value.hash(&mut h);
58    h.digest128()
59}
60
61pub fn hash_serialized_128<T: Serialize>(value: T) -> u128 {
62    let serialized = serialize_to_vec(&value, bincode::config::standard()).unwrap();
63    // The `xxh3_128` should be a little faster, but it is not guaranteed to produce the same hash.
64    // xxh3_128(serialized.as_slice())
65    one_shot_128(&serialized.as_slice())
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71    use bincode::serde::encode_to_vec as serialize_to_vec;
72    use serde::Serialize;
73
74    #[test]
75    fn hash_serialized_equals_one_shot() {
76        let value = "hello";
77        let a = hash_serialized_128(value);
78        let serialized = serialize_to_vec(&value, bincode::config::standard()).unwrap();
79        let b = one_shot_128(&serialized.as_slice());
80
81        assert_eq!(a, b);
82    }
83
84    #[test]
85    fn hashes_strings() {
86        let a = one_shot_128(&"hello");
87        let b = one_shot_128(&"hello");
88        let c = one_shot_128(&"world");
89        assert_eq!(a, b);
90        assert_ne!(a, c);
91    }
92
93    #[test]
94    fn hashes_structs() {
95        #[derive(Hash)]
96        struct S {
97            x: u32,
98            y: String,
99        }
100        let h1 = one_shot_128(&S {
101            x: 1,
102            y: "a".into(),
103        });
104        let h2 = one_shot_128(&S {
105            x: 1,
106            y: "a".into(),
107        });
108        assert_eq!(h1, h2);
109    }
110
111    #[test]
112    fn serialization_is_concatenation() {
113        // We rely on the fact that the serialization of a tuple is the concatenation of the
114        // component types, and likewise for structs. This tests that invariant.
115
116        #[derive(Debug, Serialize)]
117        struct MyStruct {
118            name: &'static str,
119            age: i32,
120            height: f64,
121        }
122
123        let my_struct = MyStruct {
124            name: "John",
125            age: 25,
126            height: 1.80,
127        };
128        let my_tuple = ("John", 25, 1.80);
129
130        let encoded_struct = serialize_to_vec(my_struct, bincode::config::standard()).unwrap();
131        let encoded_tuple = serialize_to_vec(my_tuple, bincode::config::standard()).unwrap();
132
133        assert_eq!(encoded_struct, encoded_tuple);
134
135        let encoded_str = bincode::encode_to_vec("John", bincode::config::standard()).unwrap();
136        let encoded_int = bincode::encode_to_vec(25, bincode::config::standard()).unwrap();
137        let encoded_float = bincode::encode_to_vec(1.80, bincode::config::standard()).unwrap();
138        let flattened = encoded_str
139            .iter()
140            .copied()
141            .chain(encoded_int.iter().copied())
142            .chain(encoded_float.iter().copied())
143            .collect::<Vec<u8>>();
144
145        assert_eq!(flattened, encoded_tuple);
146    }
147}