···1//! [`TokenWeights`] are representations of how common [`Token`]s are, and are paired up with
2//! a [`TokenPair`](crate::token::TokenPair) in a [`NailKov`](crate::NailKov).
34-use core::hash::BuildHasherDefault;
5-6-use estr::IdentityHasher;
7use indexmap::IndexMap;
8use rand::Rng;
9use rand_distr::{Distribution, weighted::WeightedAliasIndex};
1011-use crate::{error::NailError, token::Token};
1213/// A distribution of choices and their likelihood.
14#[derive(Clone, Debug)]
···32#[derive(Clone, Debug)]
33pub struct TokenWeightsBuilder {
34 /// Counts how many times a token is likely to appear.
35- occurrences: IndexMap<Token, u32, BuildHasherDefault<IdentityHasher>>,
36}
3738impl TokenWeightsBuilder {
···1//! [`TokenWeights`] are representations of how common [`Token`]s are, and are paired up with
2//! a [`TokenPair`](crate::token::TokenPair) in a [`NailKov`](crate::NailKov).
30004use indexmap::IndexMap;
5use rand::Rng;
6use rand_distr::{Distribution, weighted::WeightedAliasIndex};
78+use crate::{TokenHasher, error::NailError, token::Token};
910/// A distribution of choices and their likelihood.
11#[derive(Clone, Debug)]
···29#[derive(Clone, Debug)]
30pub struct TokenWeightsBuilder {
31 /// Counts how many times a token is likely to appear.
32+ occurrences: IndexMap<Token, u32, TokenHasher>,
33}
3435impl TokenWeightsBuilder {
···29}
3031/// An owned pair of [`Token`]s.
32-#[derive(Copy, Clone, Debug)]
33// Alignment repr necessary to allow LLVM to better output
34-// optimized codegen for `to_bits`, `PartialEq`
35// Prior art taken from my contribution to Bevy:
36// https://github.com/bevyengine/bevy/blob/main/crates/bevy_ecs/src/entity/mod.rs#L309
37#[repr(C, align(16))]
38pub struct TokenPair {
39 // Do not reorder the fields here. The ordering is explicitly used by repr(C)
40- // to make this struct equivalent to a u64.
41 #[cfg(target_endian = "little")]
42 pub left: Token,
43 pub right: Token,
···45 pub left: Token,
46}
4748-// By not short-circuiting in comparisons, we get better codegen.
49-// See <https://github.com/rust-lang/rust/issues/117800>
50-impl PartialEq for TokenPair {
51- #[inline(always)]
52- fn eq(&self, other: &TokenPair) -> bool {
53- // By using `to_bits`, the codegen can be optimized out even
54- // further potentially. Relies on the correct alignment/field
55- // order of `TokenPair`.
56- self.to_bits() == other.to_bits()
57- }
58-}
59-60-impl Eq for TokenPair {}
61-62impl core::hash::Hash for TokenPair {
63- #[inline(always)]
64 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
65- self.to_bits().hash(state);
066 }
67}
68···72 Self { left, right }
73 }
7400075 #[inline(always)]
76- fn to_bits(self) -> u128 {
77- (self.left.0.digest().hash() as u128) | ((self.right.0.digest().hash() as u128) << 64)
0000078 }
79}
80
···29}
3031/// An owned pair of [`Token`]s.
32+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
33// Alignment repr necessary to allow LLVM to better output
034// Prior art taken from my contribution to Bevy:
35// https://github.com/bevyengine/bevy/blob/main/crates/bevy_ecs/src/entity/mod.rs#L309
36#[repr(C, align(16))]
37pub struct TokenPair {
38 // Do not reorder the fields here. The ordering is explicitly used by repr(C)
39+ // to make this struct equivalent to a u128.
40 #[cfg(target_endian = "little")]
41 pub left: Token,
42 pub right: Token,
···44 pub left: Token,
45}
460000000000000047impl core::hash::Hash for TokenPair {
48+ #[inline]
49 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
50+ // Use only with an IdentityHasher so that you don't rehash the hash
51+ self.double_hash().hash(state);
52 }
53}
54···58 Self { left, right }
59 }
6061+ /// Use the precomputed hashes to generate a secondary hash.
62+ /// Method from [fastbloom](https://github.com/tomtomwombat/fastbloom/blob/main/src/hasher.rs#L190),
63+ /// which was in turn adapted from <https://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf>.
64 #[inline(always)]
65+ fn double_hash(&self) -> u64 {
66+ self.left
67+ .0
68+ .digest()
69+ .hash()
70+ .wrapping_add(self.right.0.digest().hash())
71+ .rotate_left(5)
72 }
73}
74