miniconf/
packed.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
use core::{
    num::NonZero,
    ops::{Deref, DerefMut},
};

use crate::{IntoKeys, Key, KeyLookup, Keys, Node, Transcode, Traversal, TreeKey};

/// A bit-packed representation of multiple indices.
///
/// Given known bit width of each index, the bits are
/// concatenated above a marker bit.
///
/// The value consists of (from storage MSB to LSB):
///
/// * Zero or more groups of variable bit length, concatenated, each containing
///   one index. The first is aligned with the storage MSB.
/// * A set bit to mark the end of the used bits.
/// * Zero or more cleared bits corresponding to unused index space.
///
/// [`Packed::EMPTY`] has the marker at the MSB.
/// During [`Packed::push_lsb()`] the indices are inserted with their MSB
/// where the marker was and the marker moves toward the storage LSB.
/// During [`Packed::pop_msb()`] the indices are removed with their MSB
/// aligned with the storage MSB and the remaining bits and the marker move
/// toward the storage MSB.
///
/// The representation is MSB aligned to make `PartialOrd`/`Ord` more natural and stable.
/// The `Packed` key `Ord` matches the ordering of nodes in a horizontal leaf tree
/// traversal. New nodes can be added/removed to the tree without changing the implicit
/// encoding (and ordering!) as long no new bits need to be allocated/deallocated (
/// as long as the number of child nodes of an internal node does not cross a
/// power-of-two boundary).
/// Under this condition the mapping between indices/paths and `Packed` representation
/// is stable even if child nodes are added/removed.
///
/// "Small numbers" in LSB-aligned representation can be obtained through
/// [`Packed::into_lsb()`]/[`Packed::from_lsb()`] but don't have the ordering
/// and stability properties.
///
/// `Packed` can be used to uniquely identify
/// nodes in a `TreeKey` using only a very small amount of bits.
/// For many realistic `TreeKey`s a `u16` or even a `u8` is sufficient
/// to hold a `Packed` in LSB notation. Together with the
/// `postcard` `serde` format, this then gives access to any node in a nested
/// heterogeneous `Tree` with just a `u16` or `u8` as compact key and `[u8]` as
/// compact value.
///
/// ```
/// use miniconf::Packed;
///
/// let mut p = Packed::EMPTY;
/// let mut p_lsb = 0b1; // marker
/// for (bits, value) in [(2, 0b11), (1, 0b0), (0, 0b0), (3, 0b101)] {
///     p.push_lsb(bits, value).unwrap();
///     p_lsb <<= bits;
///     p_lsb |= value;
/// }
/// assert_eq!(p_lsb, 0b1_11_0__101);
/// //                  ^ marker
/// assert_eq!(p, Packed::from_lsb(p_lsb.try_into().unwrap()));
/// assert_eq!(p.get(), 0b11_0__101_1 << (Packed::CAPACITY - p.len()));
/// //                              ^ marker
/// ```
#[derive(
    Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, serde::Serialize, serde::Deserialize,
)]
#[repr(transparent)]
#[serde(transparent)]
pub struct Packed(NonZero<usize>);

impl Default for Packed {
    #[inline]
    fn default() -> Self {
        Self::EMPTY
    }
}

impl From<NonZero<usize>> for Packed {
    #[inline]
    fn from(value: NonZero<usize>) -> Self {
        Self(value)
    }
}

impl From<Packed> for NonZero<usize> {
    #[inline]
    fn from(value: Packed) -> Self {
        value.0
    }
}

impl Deref for Packed {
    type Target = NonZero<usize>;

    #[inline]
    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl DerefMut for Packed {
    #[inline]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl Packed {
    /// Number of bits in the representation including the marker bit
    pub const BITS: u32 = NonZero::<usize>::BITS;

    /// The total number of bits this representation can store.
    pub const CAPACITY: u32 = Self::BITS - 1;

    /// The empty value
    pub const EMPTY: Self = Self(
        // Slightly cumbersome to generate it with `const`
        NonZero::<usize>::MIN
            .saturating_add(1)
            .saturating_pow(Self::CAPACITY),
    );

    /// Create a new `Packed` from a `usize`.
    ///
    /// The value must not be zero.
    #[inline]
    pub const fn new(value: usize) -> Option<Self> {
        match NonZero::new(value) {
            Some(value) => Some(Self(value)),
            None => None,
        }
    }

    /// Create a new `Packed` from LSB aligned `usize`
    ///
    /// The value must not be zero.
    #[inline]
    pub const fn new_from_lsb(value: usize) -> Option<Self> {
        match NonZero::new(value) {
            Some(value) => Some(Self::from_lsb(value)),
            None => None,
        }
    }

    /// The value is empty.
    #[inline]
    pub const fn is_empty(&self) -> bool {
        matches!(*self, Self::EMPTY)
    }

    /// Clear and discard all bits stored.
    #[inline]
    pub fn clear(&mut self) {
        *self = Self::EMPTY;
    }

    /// Number of bits that can be stored.
    #[inline]
    pub const fn capacity(&self) -> u32 {
        self.0.trailing_zeros()
    }

    /// Number of bits stored.
    #[inline]
    pub const fn len(&self) -> u32 {
        Self::CAPACITY - self.capacity()
    }

    /// Return the representation aligned to the LSB with the marker bit
    /// moved from the LSB to the MSB.
    #[inline]
    pub const fn into_lsb(self) -> NonZero<usize> {
        match NonZero::new(((self.0.get() >> 1) | (1 << Self::CAPACITY)) >> self.0.trailing_zeros())
        {
            Some(v) => v,
            // We ensure there is at least the marker bit set
            None => unreachable!(),
        }
    }

    /// Build a `Packed` from a LSB-aligned representation with the marker bit
    /// moved from the MSB the LSB.
    #[inline]
    pub const fn from_lsb(value: NonZero<usize>) -> Self {
        match Self::new(((value.get() << 1) | 1) << value.leading_zeros()) {
            Some(v) => v,
            // We ensure there is at least the marker bit set
            None => unreachable!(),
        }
    }

    /// Return the number of bits required to represent `num`.
    ///
    /// Ensures that at least one bit is allocated.
    #[inline]
    pub const fn bits_for(num: usize) -> u32 {
        match usize::BITS - num.leading_zeros() {
            0 => 1,
            v => v,
        }
    }

    /// Remove the given number of MSBs and return them.
    ///
    /// If the value does not contain sufficient bits
    /// it is left unchanged and `None` is returned.
    ///
    /// # Args
    /// * `bits`: Number of bits to pop. `bits <= Self::CAPACITY`
    pub fn pop_msb(&mut self, bits: u32) -> Option<usize> {
        let s = self.get();
        // Remove value from self
        Self::new(s << bits).map(|new| {
            *self = new;
            // Extract value from old self
            // Done in two steps as bits + 1 can be Self::BITS which would wrap.
            (s >> (Self::CAPACITY - bits)) >> 1
        })
    }

    /// Push the given number `bits` of `value` as new LSBs.
    ///
    /// Returns the remaining number of unused bits on success.
    ///
    /// # Args
    /// * `bits`: Number of bits to push. `bits <= Self::CAPACITY`
    /// * `value`: Value to push. `value >> bits == 0`
    pub fn push_lsb(&mut self, bits: u32, value: usize) -> Option<u32> {
        debug_assert_eq!(value >> bits, 0);
        let mut n = self.trailing_zeros();
        let old_marker = 1 << n;
        Self::new(old_marker >> bits).map(|new_marker| {
            n -= bits;
            // * Remove old marker
            // * Add value at offset n + 1
            //   Done in two steps as n + 1 can be Self::BITS, which would wrap.
            // * Add new marker
            self.0 = (self.get() ^ old_marker) | ((value << n) << 1) | new_marker.0;
            n
        })
    }
}

impl Keys for Packed {
    #[inline]
    fn next(&mut self, lookup: &KeyLookup) -> Result<usize, Traversal> {
        let bits = Self::bits_for(lookup.len().get() - 1);
        let index = self.pop_msb(bits).ok_or(Traversal::TooShort(0))?;
        index.find(lookup)
    }

    #[inline]
    fn finalize(&mut self) -> Result<(), Traversal> {
        self.is_empty().then_some(()).ok_or(Traversal::TooLong(0))
    }
}

impl IntoKeys for Packed {
    type IntoKeys = Self;

    #[inline]
    fn into_keys(self) -> Self::IntoKeys {
        self
    }
}

impl Transcode for Packed {
    fn transcode<M, K>(&mut self, keys: K) -> Result<Node, Traversal>
    where
        Self: Sized,
        M: TreeKey + ?Sized,
        K: IntoKeys,
    {
        M::traverse_by_key(keys.into_keys(), |index, _name, len| {
            match self.push_lsb(Packed::bits_for(len.get() - 1), index) {
                None => Err(()),
                Some(_) => Ok(()),
            }
        })
        .try_into()
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test() {
        // Check path encoding round trip.
        let t = [1usize, 3, 4, 0, 1];
        let mut p = Packed::EMPTY;
        for t in t {
            let bits = Packed::bits_for(t);
            p.push_lsb(bits, t).unwrap();
        }
        for t in t {
            let bits = Packed::bits_for(t);
            assert_eq!(p.pop_msb(bits).unwrap(), t);
        }
    }
}