Improved db format

This commit is contained in:
Jakub Hlusička 2026-01-24 21:47:21 +01:00
parent bbbaea803b
commit 9aa5430851

View file

@ -146,31 +146,34 @@ type DbPathSegment<'a> = Cow<'a, str>;
type DbPathBuf<'a> = Vec<DbPathSegment<'a>>;
type DbPath<'a> = [DbPathSegment<'a>];
struct DbKey(Vec<u8>);
struct DbKey {
/// Segments separated by `0x00`, with the whole thing suffixed with `[0x00, 0xFF]`.
bytes: Vec<u8>,
}
impl Deref for DbKey {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0[0..(self.0.len() - 2)]
&self.bytes[0..(self.bytes.len() - 2)]
}
}
impl DbKey {
fn from_raw(mut key: Vec<u8>) -> Self {
key.extend_from_slice(&[0, 1]);
Self(key)
key.extend_from_slice(&[0x00, 0xFF]);
Self { bytes: key }
}
fn new<'a>(path: impl IntoIterator<Item = DbPathSegment<'a>>) -> Self {
// Null bytes are not allowed within path segments, and will cause a panic.
// The segments are separated by `[0, 0]`.
// Two null-bytes are used to allow for easy range lookups by suffixing the key with:
// `[0]..[0, 1]`
// To avoid reallocations, we always suffix the key with `[0, 1]`.
// Then, a specific key can be looked up using by omitting the last two bytes.
// By omitting one byte, you get the start of the range of all paths within this path.
// By not omitting any bytes, you get the end of that range.
// Bytes of `0xFF` cannot appear in valid UTF-8.
// The byte vector stored in a `DbKey` is formed by interspersing the segments with `0x00`, and suffixing the key with `[0x00, 0xFF]`.
// This lets us represent three significant keys with a single allocation:
// * The key for querying the path itself: `bytes[..bytes.len() - 2]`, e.g. `b"first\x00second"`
// * The keys for range-querying the path's children:
// * Start (inclusive): `bytes[..bytes.len() - 1]`, e.g. `b"first\x00second\x00"`
// * End (exclusive): `bytes[..]`, e.g. `b"first\x00second\x00\xFF"`
let mut bytes = Vec::new();
@ -179,22 +182,23 @@ impl DbKey {
!segment.as_bytes().contains(&0x00),
"A path segment must not contain null bytes."
);
// No need to check for `0xFF` bytes in UTF-8 strings.
bytes.extend_from_slice(segment.as_bytes());
bytes.extend_from_slice(&[0, 0]);
bytes.push(0x00);
}
if let Some(last_byte) = bytes.last_mut() {
*last_byte = 1;
bytes.push(0xFF);
} else {
panic!("An empty path is not a valid path.");
}
DbKey(bytes)
DbKey { bytes }
}
fn range_of_children(&self) -> Range<&[u8]> {
(&self.0[0..(self.0.len() - 1)])..(&self.0[..])
(&self.bytes[0..(self.bytes.len() - 1)])..(&self.bytes[..])
}
fn segments(&self) -> impl Iterator<Item = DbPathSegment<'_>> {
@ -209,7 +213,7 @@ impl DbKey {
if let Some(end_index) = self.rest.iter().position(|byte| *byte == 0) {
let segment = &self.rest[..end_index];
let segment = str::from_utf8(segment).unwrap();
self.rest = &self.rest[end_index + 2..];
self.rest = &self.rest[end_index + 1..];
Some(Cow::Borrowed(segment))
} else {
@ -219,7 +223,7 @@ impl DbKey {
}
SegmentIterator {
rest: self.0.as_slice(),
rest: self.bytes.as_slice(),
}
}
}