ixa/entity/entity_set/
entity_set.rs

1//! A lazy, composable set type built from set-algebraic expressions.
2//!
3//! An [`EntitySet`] represents a set of [`EntityId`] values as a tree of union,
4//! intersection, and difference operations over leaf [`SourceSet`] nodes. The tree
5//! is constructed eagerly but evaluated lazily: membership tests ([`contains`]) walk
6//! the tree on demand, and iteration is deferred to [`EntitySetIterator`].
7//!
8//! Construction methods reorder operands by estimated size to improve
9//! short-circuit performance and apply only minimal structural simplification.
10//!
11//! [`contains`]: EntitySet::contains
12
13use std::borrow::Borrow;
14use std::ops::Range;
15
16use log::warn;
17use rand::Rng;
18
19use super::{EntitySetIterator, SourceSet};
20use crate::entity::{Entity, EntityId};
21use crate::random::{
22    count_and_sample_single_l_reservoir, sample_multiple_from_known_length,
23    sample_multiple_l_reservoir, sample_single_excluding_l_reservoir, sample_single_l_reservoir,
24};
25
26/// Opaque public wrapper around the internal set-expression tree.
27pub struct EntitySet<'a, E: Entity>(EntitySetInner<'a, E>);
28
29/// Internal set-expression tree used to represent composed query sources.
30pub(super) enum EntitySetInner<'a, E: Entity> {
31    Source(SourceSet<'a, E>),
32    Union(Box<EntitySet<'a, E>>, Box<EntitySet<'a, E>>),
33    Intersection(Vec<EntitySet<'a, E>>),
34    Difference(Box<EntitySet<'a, E>>, Box<EntitySet<'a, E>>),
35}
36
37impl<'a, E: Entity> Clone for EntitySet<'a, E> {
38    fn clone(&self) -> Self {
39        Self(self.0.clone())
40    }
41}
42
43impl<'a, E: Entity> Clone for EntitySetInner<'a, E> {
44    fn clone(&self) -> Self {
45        match self {
46            Self::Source(source) => Self::Source(source.clone()),
47            Self::Union(left, right) => Self::Union(left.clone(), right.clone()),
48            Self::Intersection(sets) => Self::Intersection(sets.clone()),
49            Self::Difference(left, right) => Self::Difference(left.clone(), right.clone()),
50        }
51    }
52}
53
54impl<'a, E: Entity> Default for EntitySet<'a, E> {
55    fn default() -> Self {
56        Self::empty()
57    }
58}
59
60impl<'a, E: Entity> EntitySet<'a, E> {
61    pub(super) fn into_inner(self) -> EntitySetInner<'a, E> {
62        self.0
63    }
64
65    pub(super) fn is_source_leaf(&self) -> bool {
66        matches!(self.0, EntitySetInner::Source(_))
67    }
68
69    pub(super) fn into_source_leaf(self) -> Option<SourceSet<'a, E>> {
70        match self.0 {
71            EntitySetInner::Source(source) => Some(source),
72            _ => None,
73        }
74    }
75
76    /// Create an empty entity set.
77    pub fn empty() -> Self {
78        EntitySet(EntitySetInner::Source(SourceSet::empty()))
79    }
80
81    /// Create an entity set from a single source set.
82    pub(crate) fn from_source(source: SourceSet<'a, E>) -> Self {
83        EntitySet(EntitySetInner::Source(source))
84    }
85
86    pub(crate) fn from_intersection_sources(mut sources: Vec<SourceSet<'a, E>>) -> Self {
87        match sources.len() {
88            0 => return Self::empty(),
89            1 => return Self::from_source(sources.pop().unwrap()),
90            _ => {}
91        }
92
93        // Keep intersections sorted smallest-to-largest so iterators can take the
94        // first source as the driver and membership checks short-circuit quickly.
95        sources.sort_unstable_by_key(SourceSet::sort_key);
96
97        let sets = sources.into_iter().map(Self::from_source).collect();
98
99        EntitySet(EntitySetInner::Intersection(sets))
100    }
101
102    pub fn union(self, other: Self) -> Self {
103        // Idempotence: A ∪ A = A  (same structure over same sources)
104        if self.structurally_eq(&other) {
105            return self;
106        }
107
108        // Adjacent or overlapping intervals
109        if let (Some(a), Some(b)) = (self.as_range(), other.as_range()) {
110            if a.start <= b.end && b.start <= a.end {
111                return Self::from_source(SourceSet::population_range(
112                    a.start.min(b.start)..a.end.max(b.end),
113                ));
114            }
115        }
116
117        // Union with empty set is identity: A ∪ ∅ = ∅ ∪ A = A
118        match (self.is_empty(), other.is_empty()) {
119            (true, _) => return other,
120            (_, true) => return self,
121            _ => { /* pass */ }
122        }
123
124        // Larger set on LHS: more likely to short-circuit `||`.
125        let (left, right) = if self.sort_key() >= other.sort_key() {
126            (self, other)
127        } else {
128            (other, self)
129        };
130        EntitySet(EntitySetInner::Union(Box::new(left), Box::new(right)))
131    }
132
133    pub fn intersection(self, other: Self) -> Self {
134        // Idempotence: A ∩ A = A
135        if self.structurally_eq(&other) {
136            return self;
137        }
138
139        // Intersection of overlapping intervals
140        if let (Some(a), Some(b)) = (self.as_range(), other.as_range()) {
141            let overlap = a.start.max(b.start)..a.end.min(b.end);
142            return if overlap.is_empty() {
143                Self::empty()
144            } else {
145                Self::from_source(SourceSet::population_range(overlap))
146            };
147        }
148
149        // Intersection an empty set is empty: A ∩ ∅ = ∅ ∩ A = ∅
150        match (self.is_empty(), other.is_empty()) {
151            (true, _) => return self,
152            (_, true) => return other,
153            _ => { /* pass */ }
154        }
155
156        let mut sets = match self {
157            EntitySet(EntitySetInner::Intersection(sets)) => sets,
158            _ => vec![self],
159        };
160
161        sets.push(other);
162        // Keep intersections sorted smallest-to-largest so iterators can take the
163        // first source as the driver and membership checks short-circuit quickly.
164        sets.sort_unstable_by_key(EntitySet::sort_key);
165        EntitySet(EntitySetInner::Intersection(sets))
166    }
167
168    pub fn difference(self, other: Self) -> Self {
169        // Self-subtraction: A \ A = ∅
170        if self.structurally_eq(&other) {
171            return Self::empty();
172        }
173
174        if let (Some(a), Some(b)) = (self.as_range(), other.as_range()) {
175            let overlap = a.start.max(b.start)..a.end.min(b.end);
176            // Disjoint ranges leave the left operand unchanged.
177            if overlap.is_empty() {
178                return Self::from_source(SourceSet::population_range(a));
179            }
180            // A covering subtraction removes the entire left range.
181            if overlap.start == a.start && overlap.end == a.end {
182                return Self::empty();
183            }
184            // Trimming the left edge still leaves one contiguous suffix.
185            if overlap.start == a.start {
186                return Self::from_source(SourceSet::population_range(overlap.end..a.end));
187            }
188            // Trimming the right edge still leaves one contiguous prefix.
189            if overlap.end == a.end {
190                return Self::from_source(SourceSet::population_range(a.start..overlap.start));
191            }
192            // An interior subtraction would split the range, so keep the generic difference node.
193        }
194
195        // Subtraction involving an empty set is identity: A \ ∅ = A, ∅ \ A = ∅
196        if self.is_empty() || other.is_empty() {
197            return self;
198        }
199
200        EntitySet(EntitySetInner::Difference(Box::new(self), Box::new(other)))
201    }
202
203    /// Test whether `entity_id` is a member of this set.
204    pub fn contains(&self, entity_id: EntityId<E>) -> bool {
205        match self {
206            EntitySet(EntitySetInner::Source(source)) => source.contains(entity_id),
207            EntitySet(EntitySetInner::Union(a, b)) => {
208                a.contains(entity_id) || b.contains(entity_id)
209            }
210            EntitySet(EntitySetInner::Intersection(sets)) => {
211                sets.iter().all(|set| set.contains(entity_id))
212            }
213            EntitySet(EntitySetInner::Difference(a, b)) => {
214                a.contains(entity_id) && !b.contains(entity_id)
215            }
216        }
217    }
218
219    /// Collect this set's contents into an owned vector of `EntityId<E>`.
220    pub fn to_owned_vec(self) -> Vec<EntityId<E>> {
221        self.into_iter().collect()
222    }
223
224    /// Sample a single entity uniformly from this set, excluding any entity
225    /// equal to `excluded`. Returns `None` if the set is empty or contains
226    /// only the excluded entity.
227    ///
228    /// For source-leaf sets with random-access backing (`PopulationRange`,
229    /// `IndexSet`), runs in O(1) with at most two index lookups and no
230    /// iterator construction. Falls back to O(n) reservoir sampling for
231    /// composite sets and `PropertySet` sources.
232    pub fn sample_entity_excluding<R, X>(&self, rng: &mut R, excluded: X) -> Option<EntityId<E>>
233    where
234        R: Rng,
235        X: Borrow<EntityId<E>>,
236    {
237        let excluded = *excluded.borrow();
238        if let Some(n) = self.try_len() {
239            if n == 0 {
240                return None;
241            }
242            let p = rng.random_range(0..n as u32) as usize;
243            let candidate = self.try_nth(p)?;
244            if candidate != excluded {
245                return Some(candidate);
246            }
247            // `excluded` is at position `p`. Resample from the n-1 remaining
248            // positions: pick `k` uniform in `[0, n-1)`, then map it around
249            // the hole at `p`.
250            if n == 1 {
251                return None;
252            }
253            let k = rng.random_range(0..(n - 1) as u32) as usize;
254            let target = if k < p { k } else { k + 1 };
255            return self.try_nth(target);
256        }
257        sample_single_excluding_l_reservoir(rng, self.clone(), excluded)
258    }
259
260    /// Sample a single entity uniformly from this set. Returns `None` if the
261    /// set is empty.
262    pub fn sample_entity<R>(&self, rng: &mut R) -> Option<EntityId<E>>
263    where
264        R: Rng,
265    {
266        if let Some(n) = self.try_len() {
267            if n == 0 {
268                warn!("Requested a sample entity from an empty population");
269                return None;
270            }
271            // The `u32` cast makes this function 30% faster than `usize`.
272            let index = rng.random_range(0..n as u32) as usize;
273            return self.try_nth(index);
274        }
275        sample_single_l_reservoir(rng, self.clone())
276    }
277
278    /// Count the entities in this set and sample one uniformly from them.
279    ///
280    /// Returns `(count, sample)` where `sample` is `None` iff `count == 0`.
281    pub fn count_and_sample_entity<R>(&self, rng: &mut R) -> (usize, Option<EntityId<E>>)
282    where
283        R: Rng,
284    {
285        if let Some(n) = self.try_len() {
286            if n == 0 {
287                return (0, None);
288            }
289            let index = rng.random_range(0..n as u32) as usize;
290            return (n, self.try_nth(index));
291        }
292        count_and_sample_single_l_reservoir(rng, self.clone())
293    }
294
295    /// Sample up to `requested` entities uniformly from this set. If the set
296    /// has fewer than `requested` entities, the entire set is returned.
297    pub fn sample_entities<R>(&self, rng: &mut R, requested: usize) -> Vec<EntityId<E>>
298    where
299        R: Rng,
300    {
301        match self.try_len() {
302            Some(0) => {
303                warn!("Requested a sample of entities from an empty population");
304                vec![]
305            }
306            Some(_) => sample_multiple_from_known_length(rng, self.clone(), requested),
307            None => sample_multiple_l_reservoir(rng, self.clone(), requested),
308        }
309    }
310
311    /// Returns `Some(length)` only when the set length is trivially known.
312    ///
313    /// This is true only for direct `SourceSet` leaves except `PropertySet`.
314    /// Composite expressions return `None`.
315    pub fn try_len(&self) -> Option<usize> {
316        match self {
317            EntitySet(EntitySetInner::Source(source)) => source.try_len(),
318            _ => None,
319        }
320    }
321
322    /// Random-access lookup. Defined for the same variants as `try_len`.
323    fn try_nth(&self, idx: usize) -> Option<EntityId<E>> {
324        match self {
325            EntitySet(EntitySetInner::Source(source)) => source.try_nth(idx),
326            _ => None,
327        }
328    }
329
330    fn as_range(&self) -> Option<Range<usize>> {
331        match self {
332            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(range))) => {
333                Some(range.clone())
334            }
335            _ => None,
336        }
337    }
338
339    fn is_empty(&self) -> bool {
340        match self {
341            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(range))) => {
342                range.is_empty()
343            }
344            _ => false,
345        }
346    }
347
348    fn sort_key(&self) -> (usize, u8) {
349        match self {
350            EntitySet(EntitySetInner::Source(source)) => source.sort_key(),
351            EntitySet(EntitySetInner::Union(left, right)) => {
352                // Union upper bound is additive; cost hint tracks the cheaper side.
353                let (left_upper, left_hint) = left.sort_key();
354                let (right_upper, right_hint) = right.sort_key();
355                (
356                    left_upper.saturating_add(right_upper),
357                    left_hint.min(right_hint),
358                )
359            }
360            EntitySet(EntitySetInner::Intersection(sets)) => {
361                let mut upper = usize::MAX;
362                let mut hint = 0u8;
363                for set in sets {
364                    let (set_upper, set_hint) = set.sort_key();
365                    upper = upper.min(set_upper);
366                    hint = hint.saturating_add(set_hint);
367                }
368                if upper == usize::MAX {
369                    upper = 0;
370                }
371                (upper, hint)
372            }
373            EntitySet(EntitySetInner::Difference(left, right)) => {
374                let (left_upper, left_hint) = left.sort_key();
375                let (_, right_hint) = right.sort_key();
376                (left_upper, left_hint.saturating_add(right_hint))
377            }
378        }
379    }
380
381    /// Structural equality check: same tree shape with same sources at leaves.
382    fn structurally_eq(&self, other: &Self) -> bool {
383        match (self, other) {
384            (EntitySet(EntitySetInner::Source(a)), EntitySet(EntitySetInner::Source(b))) => a == b,
385            (
386                EntitySet(EntitySetInner::Union(a1, a2)),
387                EntitySet(EntitySetInner::Union(b1, b2)),
388            )
389            | (
390                EntitySet(EntitySetInner::Difference(a1, a2)),
391                EntitySet(EntitySetInner::Difference(b1, b2)),
392            ) => a1.structurally_eq(b1) && a2.structurally_eq(b2),
393            (
394                EntitySet(EntitySetInner::Intersection(a_sets)),
395                EntitySet(EntitySetInner::Intersection(b_sets)),
396            ) => {
397                a_sets.len() == b_sets.len()
398                    && a_sets
399                        .iter()
400                        .zip(b_sets.iter())
401                        .all(|(a_set, b_set)| a_set.structurally_eq(b_set))
402            }
403            _ => false,
404        }
405    }
406}
407
408impl<'a, E: Entity> IntoIterator for EntitySet<'a, E> {
409    type Item = EntityId<E>;
410    type IntoIter = EntitySetIterator<'a, E>;
411
412    fn into_iter(self) -> Self::IntoIter {
413        EntitySetIterator::new(self)
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use rand::rngs::StdRng;
420    use rand::SeedableRng;
421
422    use super::*;
423    use crate::entity::ContextEntitiesExt;
424    use crate::hashing::IndexSet;
425    use crate::{define_derived_property, define_entity, define_property, with, Context};
426
427    define_entity!(Person);
428    define_property!(struct Age(u8), Person);
429    define_derived_property!(struct Senior(bool), Person, [Age], |age| Senior(age.0 >= 65));
430
431    fn finite_set(ids: &[usize]) -> IndexSet<EntityId<Person>> {
432        ids.iter()
433            .copied()
434            .map(EntityId::<Person>::new)
435            .collect::<IndexSet<_>>()
436    }
437
438    fn as_entity_set(set: &IndexSet<EntityId<Person>>) -> EntitySet<Person> {
439        EntitySet::from_source(SourceSet::IndexSet(set))
440    }
441
442    #[test]
443    fn from_source_empty_is_empty() {
444        let es = EntitySet::<Person>::empty();
445        assert_eq!(es.sort_key().0, 0);
446        for value in 0..10 {
447            assert!(!es.contains(EntityId::<Person>::new(value)));
448        }
449    }
450
451    #[test]
452    fn from_source_population_ranges() {
453        let population = EntitySet::from_source(SourceSet::<Person>::population_range(0..3));
454        assert!(population.contains(EntityId::<Person>::new(0)));
455        assert!(population.contains(EntityId::<Person>::new(2)));
456        assert!(!population.contains(EntityId::<Person>::new(3)));
457        assert_eq!(population.sort_key().0, 3);
458
459        let singleton = EntitySet::from_source(SourceSet::<Person>::singleton(EntityId::new(5)));
460        assert!(singleton.contains(EntityId::<Person>::new(5)));
461        assert!(!singleton.contains(EntityId::<Person>::new(4)));
462        assert_eq!(singleton.sort_key().0, 1);
463
464        let range = EntitySet::from_source(SourceSet::<Person>::population_range(2..5));
465        assert!(range.contains(EntityId::<Person>::new(2)));
466        assert!(range.contains(EntityId::<Person>::new(4)));
467        assert!(!range.contains(EntityId::<Person>::new(1)));
468        assert!(!range.contains(EntityId::<Person>::new(5)));
469        assert_eq!(range.try_len(), Some(3));
470    }
471
472    #[test]
473    fn union_basic_behavior_without_legacy_reductions() {
474        let a = finite_set(&[1, 2, 3]);
475        let e = EntitySet::<Person>::empty();
476        let u = EntitySet::from_source(SourceSet::<Person>::population_range(0..10));
477
478        let a_union_empty = as_entity_set(&a).union(e);
479        assert!(a_union_empty.contains(EntityId::<Person>::new(1)));
480        assert!(!a_union_empty.contains(EntityId::<Person>::new(4)));
481
482        let u_union_a = u.union(as_entity_set(&a));
483        assert!(u_union_a.contains(EntityId::<Person>::new(0)));
484        assert!(u_union_a.contains(EntityId::<Person>::new(9)));
485        assert!(!u_union_a.contains(EntityId::<Person>::new(10)));
486    }
487
488    #[test]
489    fn union_range_optimizations() {
490        let adjacent = EntitySet::from_source(SourceSet::<Person>::population_range(0..3)).union(
491            EntitySet::from_source(SourceSet::<Person>::population_range(3..6)),
492        );
493        assert!(matches!(
494            adjacent,
495            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(0..6)
496        ));
497
498        let overlapping = EntitySet::from_source(SourceSet::<Person>::population_range(2..6))
499            .union(EntitySet::from_source(
500                SourceSet::<Person>::population_range(4..8),
501            ));
502        assert!(matches!(
503            overlapping,
504            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(2..8)
505        ));
506
507        let disjoint = EntitySet::from_source(SourceSet::<Person>::singleton(EntityId::new(1)))
508            .union(EntitySet::from_source(SourceSet::<Person>::singleton(
509                EntityId::new(4),
510            )));
511        assert!(matches!(disjoint, EntitySet(EntitySetInner::Union(_, _))));
512    }
513
514    #[test]
515    fn intersection_range_optimizations() {
516        let overlap = EntitySet::from_source(SourceSet::<Person>::population_range(2..6))
517            .intersection(EntitySet::from_source(
518                SourceSet::<Person>::population_range(4..8),
519            ));
520        assert!(matches!(
521            overlap,
522            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(4..6)
523        ));
524
525        let empty = EntitySet::from_source(SourceSet::<Person>::population_range(1..3))
526            .intersection(EntitySet::from_source(
527                SourceSet::<Person>::population_range(5..7),
528            ));
529        assert!(matches!(
530            empty,
531            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(0..0)
532        ));
533
534        let indexed_ids = finite_set(&[1, 2, 3]);
535        let mixed = EntitySet::from_source(SourceSet::<Person>::singleton(EntityId::new(2)))
536            .intersection(as_entity_set(&indexed_ids));
537        assert!(mixed.contains(EntityId::<Person>::new(2)));
538        assert!(!mixed.contains(EntityId::<Person>::new(1)));
539    }
540
541    #[test]
542    fn difference_range_optimizations() {
543        let unchanged = EntitySet::from_source(SourceSet::<Person>::population_range(2..6))
544            .difference(EntitySet::from_source(
545                SourceSet::<Person>::population_range(8..10),
546            ));
547        assert!(matches!(
548            unchanged,
549            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(2..6)
550        ));
551
552        let empty = EntitySet::from_source(SourceSet::<Person>::population_range(2..6)).difference(
553            EntitySet::from_source(SourceSet::<Person>::population_range(1..7)),
554        );
555        assert!(matches!(
556            empty,
557            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(0..0)
558        ));
559
560        let trim_left = EntitySet::from_source(SourceSet::<Person>::population_range(2..6))
561            .difference(EntitySet::from_source(
562                SourceSet::<Person>::population_range(1..4),
563            ));
564        assert!(matches!(
565            trim_left,
566            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(4..6)
567        ));
568
569        let trim_right = EntitySet::from_source(SourceSet::<Person>::population_range(2..6))
570            .difference(EntitySet::from_source(
571                SourceSet::<Person>::population_range(4..8),
572            ));
573        assert!(matches!(
574            trim_right,
575            EntitySet(EntitySetInner::Source(SourceSet::PopulationRange(ref range))) if range == &(2..4)
576        ));
577
578        let split = EntitySet::from_source(SourceSet::<Person>::population_range(2..8)).difference(
579            EntitySet::from_source(SourceSet::<Person>::population_range(4..6)),
580        );
581        assert!(matches!(split, EntitySet(EntitySetInner::Difference(_, _))));
582        assert!(split.contains(EntityId::<Person>::new(2)));
583        assert!(split.contains(EntityId::<Person>::new(7)));
584        assert!(!split.contains(EntityId::<Person>::new(4)));
585        assert!(!split.contains(EntityId::<Person>::new(5)));
586    }
587
588    #[test]
589    fn difference_is_not_commutative() {
590        let a = finite_set(&[1, 2, 3]);
591        let b = finite_set(&[2, 3, 4]);
592
593        let d1 = as_entity_set(&a).difference(as_entity_set(&b));
594        let c = finite_set(&[2, 3, 4]);
595        let d = finite_set(&[1, 2, 3]);
596        let d2 = as_entity_set(&c).difference(as_entity_set(&d));
597
598        assert!(d1.contains(EntityId::<Person>::new(1)));
599        assert!(!d1.contains(EntityId::<Person>::new(4)));
600        assert!(d2.contains(EntityId::<Person>::new(4)));
601        assert!(!d2.contains(EntityId::<Person>::new(1)));
602    }
603
604    #[test]
605    fn sort_key_rules() {
606        let a = finite_set(&[1, 2]);
607        let b = finite_set(&[2, 3, 4]);
608
609        let union = as_entity_set(&a).union(as_entity_set(&b));
610        assert_eq!(union.sort_key(), (a.len() + b.len(), 3));
611
612        let intersection = as_entity_set(&a).intersection(as_entity_set(&b));
613        assert_eq!(intersection.sort_key(), (a.len().min(b.len()), 6));
614
615        let difference = as_entity_set(&a).difference(as_entity_set(&b));
616        assert_eq!(difference.sort_key(), (a.len(), 6));
617    }
618
619    #[test]
620    fn compound_expressions_membership() {
621        let a = finite_set(&[1, 2, 3, 4]);
622        let b = finite_set(&[3, 4, 5]);
623        let c = finite_set(&[10, 20]);
624        let d = finite_set(&[20]);
625
626        let union_of_intersections = as_entity_set(&a)
627            .intersection(as_entity_set(&b))
628            .union(as_entity_set(&c).intersection(as_entity_set(&d)));
629        assert!(union_of_intersections.contains(EntityId::<Person>::new(3)));
630        assert!(union_of_intersections.contains(EntityId::<Person>::new(4)));
631        assert!(union_of_intersections.contains(EntityId::<Person>::new(20)));
632        assert!(!union_of_intersections.contains(EntityId::<Person>::new(5)));
633
634        let a2 = finite_set(&[1, 2, 3]);
635        let b2 = finite_set(&[3, 4, 5]);
636        let a3 = finite_set(&[1, 2, 3]);
637        let law = as_entity_set(&a3).intersection(as_entity_set(&a2).union(as_entity_set(&b2)));
638        assert!(law.contains(EntityId::<Person>::new(1)));
639        assert!(law.contains(EntityId::<Person>::new(2)));
640        assert!(law.contains(EntityId::<Person>::new(3)));
641        assert!(!law.contains(EntityId::<Person>::new(4)));
642    }
643
644    #[test]
645    fn clone_preserves_composite_expression_behavior() {
646        let a = finite_set(&[1, 2, 3, 4]);
647        let b = finite_set(&[3, 4, 5]);
648        let c = finite_set(&[2]);
649
650        let set = as_entity_set(&a)
651            .difference(as_entity_set(&c))
652            .union(as_entity_set(&b));
653        let cloned = set.clone();
654
655        for value in 0..7 {
656            let entity_id = EntityId::<Person>::new(value);
657            assert_eq!(set.contains(entity_id), cloned.contains(entity_id));
658        }
659
660        assert_eq!(
661            set.into_iter().collect::<Vec<_>>(),
662            cloned.into_iter().collect::<Vec<_>>()
663        );
664    }
665
666    #[test]
667    fn population_zero_is_empty() {
668        let es = EntitySet::from_source(SourceSet::<Person>::empty());
669        assert_eq!(es.sort_key().0, 0);
670        assert!(!es.contains(EntityId::<Person>::new(0)));
671    }
672
673    #[test]
674    fn try_len_known_only_for_non_property_sources() {
675        let empty = EntitySet::<Person>::from_source(SourceSet::empty());
676        assert_eq!(empty.try_len(), Some(0));
677
678        let singleton = EntitySet::<Person>::from_source(SourceSet::singleton(EntityId::new(42)));
679        assert_eq!(singleton.try_len(), Some(1));
680
681        let population = EntitySet::<Person>::from_source(SourceSet::population_range(0..5));
682        assert_eq!(population.try_len(), Some(5));
683
684        let range = EntitySet::<Person>::from_source(SourceSet::population_range(4..9));
685        assert_eq!(range.try_len(), Some(5));
686
687        let index_data = [EntityId::new(1), EntityId::new(2), EntityId::new(3)]
688            .into_iter()
689            .collect::<IndexSet<_>>();
690        let indexed = EntitySet::<Person>::from_source(SourceSet::IndexSet(&index_data));
691        assert_eq!(indexed.try_len(), Some(3));
692
693        let mut context = Context::new();
694        context.add_entity(with!(Person, Age(10))).unwrap();
695        let property_source = SourceSet::<Person>::new(Age(10), &context).unwrap();
696        assert!(matches!(property_source, SourceSet::PropertySet(_)));
697        let property_set = EntitySet::<Person>::from_source(property_source);
698        assert_eq!(property_set.try_len(), None);
699
700        let composed = EntitySet::<Person>::from_source(SourceSet::population_range(0..3))
701            .difference(EntitySet::from_source(SourceSet::singleton(EntityId::new(
702                1,
703            ))));
704        assert_eq!(composed.try_len(), None);
705    }
706
707    #[test]
708    fn range_leaf_works_inside_composite_expressions() {
709        let indexed_ids = finite_set(&[1, 3, 5, 8]);
710        let indexed = as_entity_set(&indexed_ids);
711        let range = EntitySet::from_source(SourceSet::<Person>::population_range(2..8));
712
713        let intersection = range.intersection(indexed);
714        assert!(!intersection.contains(EntityId::new(1)));
715        assert!(intersection.contains(EntityId::new(3)));
716        assert!(intersection.contains(EntityId::new(5)));
717        assert!(!intersection.contains(EntityId::new(8)));
718    }
719
720    #[test]
721    fn clone_preserves_unindexed_concrete_property_query_results() {
722        let mut context = Context::new();
723        let p1 = context.add_entity(with!(Person, Age(10))).unwrap();
724        let p2 = context.add_entity(with!(Person, Age(10))).unwrap();
725        let _p3 = context.add_entity(with!(Person, Age(11))).unwrap();
726
727        let set = context.query::<Person, _>(with!(Person, Age(10)));
728        assert_eq!(set.try_len(), None);
729        let cloned = set.clone();
730
731        let mut iter = set.into_iter();
732        assert_eq!(iter.next(), Some(p1));
733        assert_eq!(iter.collect::<Vec<_>>(), vec![p2]);
734
735        assert!(cloned.contains(p1));
736        assert!(cloned.contains(p2));
737        assert_eq!(cloned.into_iter().collect::<Vec<_>>(), vec![p1, p2]);
738    }
739
740    #[test]
741    fn clone_preserves_unindexed_derived_property_query_results() {
742        let mut context = Context::new();
743        let _p1 = context.add_entity(with!(Person, Age(64))).unwrap();
744        let p2 = context.add_entity(with!(Person, Age(65))).unwrap();
745        let p3 = context.add_entity(with!(Person, Age(90))).unwrap();
746
747        let set = context.query::<Person, _>(with!(Person, Senior(true)));
748        assert_eq!(set.try_len(), None);
749        let cloned = set.clone();
750
751        assert!(set.contains(p2));
752        assert!(set.contains(p3));
753        assert_eq!(set.into_iter().collect::<Vec<_>>(), vec![p2, p3]);
754        assert_eq!(cloned.into_iter().collect::<Vec<_>>(), vec![p2, p3]);
755    }
756
757    #[test]
758    fn sample_entity_excluding_skips_excluded() {
759        let set = EntitySet::from_source(SourceSet::<Person>::PopulationRange(0..5));
760        let excluded = EntityId::<Person>::new(2);
761        let mut rng = StdRng::seed_from_u64(42);
762        for _ in 0..200 {
763            let sampled = set.sample_entity_excluding(&mut rng, excluded).unwrap();
764            assert_ne!(sampled, excluded);
765            assert!(sampled.0 < 5);
766        }
767    }
768
769    #[test]
770    fn sample_entity_excluding_returns_none_when_only_excluded_present() {
771        let only = EntityId::<Person>::new(7);
772        let single = finite_set(&[7]);
773        let mut rng = StdRng::seed_from_u64(42);
774        assert_eq!(
775            as_entity_set(&single).sample_entity_excluding(&mut rng, only),
776            None
777        );
778    }
779
780    #[test]
781    fn sample_entity_excluding_returns_none_on_empty() {
782        let mut rng = StdRng::seed_from_u64(42);
783        assert_eq!(
784            EntitySet::<Person>::empty()
785                .sample_entity_excluding(&mut rng, EntityId::<Person>::new(0)),
786            None
787        );
788    }
789
790    #[test]
791    fn sample_entity_excluding_excluded_not_in_set_uses_first_pick() {
792        // When `excluded` is outside the set, every sample is the first
793        // uniform pick — exercises the no-resample path.
794        let set = EntitySet::from_source(SourceSet::<Person>::PopulationRange(0..10));
795        let mut rng = StdRng::seed_from_u64(42);
796        for _ in 0..200 {
797            let sampled = set
798                .sample_entity_excluding(&mut rng, EntityId::<Person>::new(999))
799                .unwrap();
800            assert!(sampled.0 < 10);
801        }
802    }
803
804    #[test]
805    fn sample_entity_excluding_uniform_over_known_length() {
806        // Chi-square test on PopulationRange (known-length, fast path).
807        let excluded = EntityId::<Person>::new(7);
808        let set = EntitySet::from_source(SourceSet::<Person>::PopulationRange(0..20));
809        let num_runs = 50_000;
810        let mut counts = [0usize; 20];
811        let mut rng = StdRng::seed_from_u64(42);
812        for _ in 0..num_runs {
813            let id = set.sample_entity_excluding(&mut rng, excluded).unwrap();
814            counts[id.0] += 1;
815        }
816        assert_eq!(counts[excluded.0], 0);
817
818        let expected = num_runs as f64 / 19.0;
819        let chi_square: f64 = counts
820            .iter()
821            .enumerate()
822            .filter(|(i, _)| *i != excluded.0)
823            .map(|(_, &obs)| {
824                let diff = obs as f64 - expected;
825                diff * diff / expected
826            })
827            .sum();
828        // df = 18, χ²_{0.999} ≈ 42.31
829        assert!(chi_square < 42.31, "χ² = {chi_square}");
830    }
831}