ixa/people/
index.rs

1//! Each property type `P: PersonProperty` has a corresponding `Index<P>` type a single
2//! instance of which is stored in the `PeopleData::property_indexes: RefCell<HashMap<TypeId,
3//! BxIndex>>` map. The map `PeopleData::property_indexes` is keyed by the `TypeId` of the
4//! property tag type `P`, _not_ the value type of the property, `PersonProperty::Value`.
5//!
6//! `Index<P>::lookup: HashTable<(T, std::collections::HashSet<PersonId>)>` is a hash table,
7//! _not_ a hash map. The difference is, a hash table is keyed by a `u64`, in our case,
8//! the 64-bit hash of the property value, and allows for a custom equality check. For
9//! equality, we compare the 128-bit hash of the property value. This allows us to keep the
10//! lookup operation completely type-erased. The value associated with the key is a tuple
11//! of the property value and a set of `PersonId`s. The property value is stored so that
12//!
13//! 1. we can compute its 128-bit hash to check equality during lookup,
14//! 2. we can iterate over (property value, set of `PersonId`s) pairs in the typed API, and
15//! 3. we can iterate over (serialized property value, set of `PersonId`s) pairs in the
16//!    type-erased API.
17
18use crate::{people::HashValueType, Context, ContextPeopleExt, HashSet, PersonId, PersonProperty};
19use hashbrown::HashTable;
20use log::{error, trace};
21
22pub type BxIndex = Box<dyn TypeErasedIndex>;
23
24/// The typed index.
25#[derive(Default)]
26pub struct Index<T: PersonProperty> {
27    // Primarily for debugging purposes
28    #[allow(dead_code)]
29    pub(super) name: &'static str,
30
31    // We store a copy of the value here so that we can iterate over
32    // it in the typed API, and so that the type-erased API can
33    // access some serialization of it.
34    lookup: HashTable<(T::CanonicalValue, HashSet<PersonId>)>,
35
36    // The largest person ID that has been indexed. Used so that we
37    // can lazily index when a person is added.
38    pub(super) max_indexed: usize,
39
40    pub(super) is_indexed: bool,
41}
42
43/// Implements the typed API
44impl<T: PersonProperty> Index<T> {
45    #[must_use]
46    pub fn new() -> Box<Self> {
47        Box::new(Self {
48            name: T::name(),
49            lookup: HashTable::default(),
50            max_indexed: 0,
51            is_indexed: false,
52        })
53    }
54
55    /// Inserts an entity into the set associated with `key`, creating a new set if one does not yet
56    /// exist. Returns a `bool` according to whether the `entity_id` already existed in the set.
57    pub fn add_person(&mut self, key: &T::CanonicalValue, entity_id: PersonId) -> bool {
58        trace!("adding person {} to index {}", entity_id, T::name());
59        let hash = T::hash_property_value(key);
60
61        // > `hasher` is called if entries need to be moved or copied to a new table.
62        // > This must return the same hash value that each entry was inserted with.
63        #[allow(clippy::cast_possible_truncation)]
64        let hasher = |(stored_value, _stored_set): &_| T::hash_property_value(stored_value) as u64;
65        // Equality is determined by comparing the full 128-bit hashes. We do not expect any
66        // collisions before the heat death of the universe.
67        let hash128_equality = |(stored_value, _): &_| T::hash_property_value(stored_value) == hash;
68        #[allow(clippy::cast_possible_truncation)]
69        self.lookup
70            .entry(hash as u64, hash128_equality, hasher)
71            .or_insert_with(|| (*key, HashSet::default()))
72            .get_mut()
73            .1
74            .insert(entity_id)
75    }
76
77    pub fn remove_person(&mut self, key: &T::CanonicalValue, entity_id: PersonId) {
78        let hash = T::hash_property_value(key);
79        self.remove_person_with_hash(hash, entity_id);
80    }
81}
82
83/// This trait Encapsulates the type-erased API.
84pub trait TypeErasedIndex {
85    fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
86
87    /// Adding a person only requires the hash if the value is already in the index.
88    #[allow(dead_code)]
89    fn add_person_with_hash(&mut self, hash: HashValueType, entity_id: PersonId);
90
91    /// Removing a person only requires the hash.
92    fn remove_person_with_hash(&mut self, hash: HashValueType, entity_id: PersonId);
93
94    /// Fetching a set only requires the hash.
95    fn get_with_hash(&self, hash: HashValueType) -> Option<&HashSet<PersonId>>;
96
97    fn is_indexed(&self) -> bool;
98    fn set_indexed(&mut self, is_indexed: bool);
99    fn index_unindexed_people(&mut self, context: &Context);
100
101    /// Produces an iterator over pairs (serialized property value, set of `PersonId`s).
102    fn iter_serialized_values_people(
103        &self,
104    ) -> Box<dyn Iterator<Item = (String, &HashSet<PersonId>)> + '_>;
105}
106
107// Implements the type-erased API for Index<T>.
108impl<T: PersonProperty> TypeErasedIndex for Index<T> {
109    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
110        self
111    }
112
113    fn add_person_with_hash(&mut self, hash: HashValueType, entity_id: PersonId) {
114        // Equality is determined by comparing the full 128-bit hashes. We do not expect any
115        // collisions before the heat death of the universe.
116        let hash128_equality = |(stored_value, _): &_| T::hash_property_value(stored_value) == hash;
117
118        #[allow(clippy::cast_possible_truncation)]
119        if let Ok(mut entry) = self.lookup.find_entry(hash as u64, hash128_equality) {
120            let (_, set) = entry.get_mut();
121            set.insert(entity_id);
122        } else {
123            error!(
124                "could not find entry for hash {} when adding person {} to index",
125                hash, entity_id
126            );
127        }
128    }
129
130    /// Removing a person only requires the hash.
131    fn remove_person_with_hash(&mut self, hash: HashValueType, entity_id: PersonId) {
132        // Equality is determined by comparing the full 128-bit hashes. We do not expect any
133        // collisions before the heat death of the universe.
134        let hash128_equality = |(stored_value, _): &_| T::hash_property_value(stored_value) == hash;
135
136        #[allow(clippy::cast_possible_truncation)]
137        if let Ok(mut entry) = self.lookup.find_entry(hash as u64, hash128_equality) {
138            let (_, set) = entry.get_mut();
139            set.remove(&entity_id);
140            // Clean up the entry if there are no people
141            if set.is_empty() {
142                entry.remove();
143            }
144        } else {
145            error!(
146                "could not find entry for hash {} when removing person {} from index",
147                hash, entity_id
148            );
149        }
150    }
151
152    /// Fetching a set only requires the hash.
153    fn get_with_hash(&self, hash: HashValueType) -> Option<&HashSet<PersonId>> {
154        // Equality is determined by comparing the full 128-bit hashes. We do not expect
155        // any collisions before the heat death of the universe.
156        let hash128_equality = |(stored_value, _): &_| T::hash_property_value(stored_value) == hash;
157        #[allow(clippy::cast_possible_truncation)]
158        self.lookup
159            .find(hash as u64, hash128_equality)
160            .map(|(_, set)| set)
161    }
162
163    fn is_indexed(&self) -> bool {
164        self.is_indexed
165    }
166
167    fn set_indexed(&mut self, is_indexed: bool) {
168        self.is_indexed = is_indexed;
169    }
170
171    fn index_unindexed_people(&mut self, context: &Context) {
172        if !self.is_indexed {
173            return;
174        }
175        let current_pop = context.get_current_population();
176        trace!(
177            "{}: indexing unindexed people {}..<{}",
178            T::name(),
179            self.max_indexed,
180            current_pop
181        );
182
183        for id in self.max_indexed..current_pop {
184            let person_id = PersonId(id);
185            let value = context.get_person_property(person_id, T::get_instance());
186            self.add_person(&T::make_canonical(value), person_id);
187        }
188        self.max_indexed = current_pop;
189    }
190
191    fn iter_serialized_values_people(
192        &self,
193    ) -> Box<dyn Iterator<Item = (String, &HashSet<PersonId>)> + '_> {
194        Box::new(self.lookup.iter().map(|(k, v)| (T::get_display(k), v)))
195    }
196}
197
198pub fn process_indices(
199    context: &Context,
200    remaining_indices: &[&BxIndex],
201    property_names: &mut Vec<String>,
202    current_matches: &HashSet<PersonId>,
203    print_fn: &dyn Fn(&Context, &[String], usize),
204) {
205    if remaining_indices.is_empty() {
206        print_fn(context, property_names, current_matches.len());
207        return;
208    }
209
210    let (&next_index, rest_indices) = remaining_indices.split_first().unwrap();
211    // If there is nothing in the index, we don't need to process it
212    if !next_index.is_indexed() {
213        return;
214    }
215
216    for (display, people) in next_index.iter_serialized_values_people() {
217        let intersect = !property_names.is_empty();
218        property_names.push(display);
219
220        let matches = if intersect {
221            &current_matches.intersection(people).copied().collect()
222        } else {
223            people
224        };
225
226        process_indices(context, rest_indices, property_names, matches, print_fn);
227        property_names.pop();
228    }
229}
230
231#[cfg(test)]
232mod test {
233    // Tests in `src/people/query.rs` also exercise indexing code.
234
235    use crate::hashing::{hash_serialized_128, one_shot_128};
236    use crate::people::index::Index;
237    use crate::prelude::*;
238    use crate::{define_multi_property, set_log_level, set_module_filter, PersonProperty};
239    use log::LevelFilter;
240
241    define_person_property!(Age, u8);
242    define_person_property!(Weight, u8);
243    define_person_property!(Height, u8);
244
245    define_multi_property!(AWH, (Age, Weight, Height));
246    define_multi_property!(WHA, (Weight, Height, Age));
247
248    #[test]
249    fn test_multi_property_index_typed_api() {
250        let mut context = Context::new();
251        set_log_level(LevelFilter::Trace);
252        set_module_filter("ixa", LevelFilter::Trace);
253
254        context.index_property(WHA);
255        context.index_property(AWH);
256
257        context
258            .add_person(((Age, 1u8), (Weight, 2u8), (Height, 3u8)))
259            .unwrap();
260
261        let mut results_a = Default::default();
262        context.with_query_results((AWH, (1u8, 2u8, 3u8)), &mut |results| {
263            results_a = results.clone()
264        });
265        assert_eq!(results_a.len(), 1);
266
267        let mut results_b = Default::default();
268        context.with_query_results((WHA, (2u8, 3u8, 1u8)), &mut |results| {
269            results_b = results.clone()
270        });
271        assert_eq!(results_b.len(), 1);
272
273        assert_eq!(results_a, results_b);
274        println!("Results: {:?}", results_a);
275
276        context
277            .add_person(((Weight, 1u8), (Height, 2u8), (Age, 3u8)))
278            .unwrap();
279
280        let mut results_a = Default::default();
281        context.with_query_results((WHA, (1u8, 2u8, 3u8)), &mut |results| {
282            results_a = results.clone()
283        });
284        assert_eq!(results_a.len(), 1);
285
286        let mut results_b = Default::default();
287        context.with_query_results((AWH, (3u8, 1u8, 2u8)), &mut |results| {
288            results_b = results.clone()
289        });
290        assert_eq!(results_b.len(), 1);
291
292        assert_eq!(results_a, results_b);
293
294        println!("Results: {:?}", results_a);
295
296        set_module_filter("ixa", LevelFilter::Info);
297        set_log_level(LevelFilter::Off);
298    }
299
300    #[test]
301    fn index_name() {
302        let index = Index::<Age>::new();
303        assert!(index.name.contains("Age"));
304    }
305
306    #[test]
307    fn test_index_value_compute_same_values() {
308        let value = hash_serialized_128("test value");
309        let value2 = hash_serialized_128("test value");
310        assert_eq!(one_shot_128(&value), one_shot_128(&value2));
311    }
312
313    #[test]
314    fn test_index_value_compute_different_values() {
315        let value1 = 42;
316        let value2 = 43;
317        assert_ne!(
318            <Age as PersonProperty>::hash_property_value(&value1),
319            <Age as PersonProperty>::hash_property_value(&value2)
320        );
321    }
322}