miniconf/packed.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
use core::{
num::NonZero,
ops::{Deref, DerefMut},
};
use crate::{IntoKeys, Key, KeyLookup, Keys, Node, Transcode, Traversal, TreeKey};
/// A bit-packed representation of multiple indices.
///
/// Given known bit width of each index, the bits are
/// concatenated above a marker bit.
///
/// The value consists of (from storage MSB to LSB):
///
/// * Zero or more groups of variable bit length, concatenated, each containing
/// one index. The first is aligned with the storage MSB.
/// * A set bit to mark the end of the used bits.
/// * Zero or more cleared bits corresponding to unused index space.
///
/// [`Packed::EMPTY`] has the marker at the MSB.
/// During [`Packed::push_lsb()`] the indices are inserted with their MSB
/// where the marker was and the marker moves toward the storage LSB.
/// During [`Packed::pop_msb()`] the indices are removed with their MSB
/// aligned with the storage MSB and the remaining bits and the marker move
/// toward the storage MSB.
///
/// The representation is MSB aligned to make `PartialOrd`/`Ord` more natural and stable.
/// The `Packed` key `Ord` matches the ordering of nodes in a horizontal leaf tree
/// traversal. New nodes can be added/removed to the tree without changing the implicit
/// encoding (and ordering!) as long no new bits need to be allocated/deallocated (
/// as long as the number of child nodes of an internal node does not cross a
/// power-of-two boundary).
/// Under this condition the mapping between indices/paths and `Packed` representation
/// is stable even if child nodes are added/removed.
///
/// "Small numbers" in LSB-aligned representation can be obtained through
/// [`Packed::into_lsb()`]/[`Packed::from_lsb()`] but don't have the ordering
/// and stability properties.
///
/// `Packed` can be used to uniquely identify
/// nodes in a `TreeKey` using only a very small amount of bits.
/// For many realistic `TreeKey`s a `u16` or even a `u8` is sufficient
/// to hold a `Packed` in LSB notation. Together with the
/// `postcard` `serde` format, this then gives access to any node in a nested
/// heterogeneous `Tree` with just a `u16` or `u8` as compact key and `[u8]` as
/// compact value.
///
/// ```
/// use miniconf::Packed;
///
/// let mut p = Packed::EMPTY;
/// let mut p_lsb = 0b1; // marker
/// for (bits, value) in [(2, 0b11), (1, 0b0), (0, 0b0), (3, 0b101)] {
/// p.push_lsb(bits, value).unwrap();
/// p_lsb <<= bits;
/// p_lsb |= value;
/// }
/// assert_eq!(p_lsb, 0b1_11_0__101);
/// // ^ marker
/// assert_eq!(p, Packed::from_lsb(p_lsb.try_into().unwrap()));
/// assert_eq!(p.get(), 0b11_0__101_1 << (Packed::CAPACITY - p.len()));
/// // ^ marker
/// ```
#[derive(
Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, serde::Serialize, serde::Deserialize,
)]
#[repr(transparent)]
#[serde(transparent)]
pub struct Packed(NonZero<usize>);
impl Default for Packed {
#[inline]
fn default() -> Self {
Self::EMPTY
}
}
impl From<NonZero<usize>> for Packed {
#[inline]
fn from(value: NonZero<usize>) -> Self {
Self(value)
}
}
impl From<Packed> for NonZero<usize> {
#[inline]
fn from(value: Packed) -> Self {
value.0
}
}
impl Deref for Packed {
type Target = NonZero<usize>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Packed {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl Packed {
/// Number of bits in the representation including the marker bit
pub const BITS: u32 = NonZero::<usize>::BITS;
/// The total number of bits this representation can store.
pub const CAPACITY: u32 = Self::BITS - 1;
/// The empty value
pub const EMPTY: Self = Self(
// Slightly cumbersome to generate it with `const`
NonZero::<usize>::MIN
.saturating_add(1)
.saturating_pow(Self::CAPACITY),
);
/// Create a new `Packed` from a `usize`.
///
/// The value must not be zero.
#[inline]
pub const fn new(value: usize) -> Option<Self> {
match NonZero::new(value) {
Some(value) => Some(Self(value)),
None => None,
}
}
/// Create a new `Packed` from LSB aligned `usize`
///
/// The value must not be zero.
#[inline]
pub const fn new_from_lsb(value: usize) -> Option<Self> {
match NonZero::new(value) {
Some(value) => Some(Self::from_lsb(value)),
None => None,
}
}
/// The value is empty.
#[inline]
pub const fn is_empty(&self) -> bool {
matches!(*self, Self::EMPTY)
}
/// Clear and discard all bits stored.
#[inline]
pub fn clear(&mut self) {
*self = Self::EMPTY;
}
/// Number of bits that can be stored.
#[inline]
pub const fn capacity(&self) -> u32 {
self.0.trailing_zeros()
}
/// Number of bits stored.
#[inline]
pub const fn len(&self) -> u32 {
Self::CAPACITY - self.capacity()
}
/// Return the representation aligned to the LSB with the marker bit
/// moved from the LSB to the MSB.
#[inline]
pub const fn into_lsb(self) -> NonZero<usize> {
match NonZero::new(((self.0.get() >> 1) | (1 << Self::CAPACITY)) >> self.0.trailing_zeros())
{
Some(v) => v,
// We ensure there is at least the marker bit set
None => unreachable!(),
}
}
/// Build a `Packed` from a LSB-aligned representation with the marker bit
/// moved from the MSB the LSB.
#[inline]
pub const fn from_lsb(value: NonZero<usize>) -> Self {
match Self::new(((value.get() << 1) | 1) << value.leading_zeros()) {
Some(v) => v,
// We ensure there is at least the marker bit set
None => unreachable!(),
}
}
/// Return the number of bits required to represent `num`.
///
/// Ensures that at least one bit is allocated.
#[inline]
pub const fn bits_for(num: usize) -> u32 {
match usize::BITS - num.leading_zeros() {
0 => 1,
v => v,
}
}
/// Remove the given number of MSBs and return them.
///
/// If the value does not contain sufficient bits
/// it is left unchanged and `None` is returned.
///
/// # Args
/// * `bits`: Number of bits to pop. `bits <= Self::CAPACITY`
pub fn pop_msb(&mut self, bits: u32) -> Option<usize> {
let s = self.get();
// Remove value from self
Self::new(s << bits).map(|new| {
*self = new;
// Extract value from old self
// Done in two steps as bits + 1 can be Self::BITS which would wrap.
(s >> (Self::CAPACITY - bits)) >> 1
})
}
/// Push the given number `bits` of `value` as new LSBs.
///
/// Returns the remaining number of unused bits on success.
///
/// # Args
/// * `bits`: Number of bits to push. `bits <= Self::CAPACITY`
/// * `value`: Value to push. `value >> bits == 0`
pub fn push_lsb(&mut self, bits: u32, value: usize) -> Option<u32> {
debug_assert_eq!(value >> bits, 0);
let mut n = self.trailing_zeros();
let old_marker = 1 << n;
Self::new(old_marker >> bits).map(|new_marker| {
n -= bits;
// * Remove old marker
// * Add value at offset n + 1
// Done in two steps as n + 1 can be Self::BITS, which would wrap.
// * Add new marker
self.0 = (self.get() ^ old_marker) | ((value << n) << 1) | new_marker.0;
n
})
}
}
impl Keys for Packed {
#[inline]
fn next(&mut self, lookup: &KeyLookup) -> Result<usize, Traversal> {
let bits = Self::bits_for(lookup.len().get() - 1);
let index = self.pop_msb(bits).ok_or(Traversal::TooShort(0))?;
index.find(lookup)
}
#[inline]
fn finalize(&mut self) -> Result<(), Traversal> {
self.is_empty().then_some(()).ok_or(Traversal::TooLong(0))
}
}
impl IntoKeys for Packed {
type IntoKeys = Self;
#[inline]
fn into_keys(self) -> Self::IntoKeys {
self
}
}
impl Transcode for Packed {
fn transcode<M, K>(&mut self, keys: K) -> Result<Node, Traversal>
where
Self: Sized,
M: TreeKey + ?Sized,
K: IntoKeys,
{
M::traverse_by_key(keys.into_keys(), |index, _name, len| {
match self.push_lsb(Packed::bits_for(len.get() - 1), index) {
None => Err(()),
Some(_) => Ok(()),
}
})
.try_into()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test() {
// Check path encoding round trip.
let t = [1usize, 3, 4, 0, 1];
let mut p = Packed::EMPTY;
for t in t {
let bits = Packed::bits_for(t);
p.push_lsb(bits, t).unwrap();
}
for t in t {
let bits = Packed::bits_for(t);
assert_eq!(p.pop_msb(bits).unwrap(), t);
}
}
}