Improved db format

This commit is contained in:
Jakub Hlusička 2026-01-24 21:47:21 +01:00
parent bbbaea803b
commit 9aa5430851

View file

@ -146,31 +146,34 @@ type DbPathSegment<'a> = Cow<'a, str>;
type DbPathBuf<'a> = Vec<DbPathSegment<'a>>; type DbPathBuf<'a> = Vec<DbPathSegment<'a>>;
type DbPath<'a> = [DbPathSegment<'a>]; type DbPath<'a> = [DbPathSegment<'a>];
struct DbKey(Vec<u8>); struct DbKey {
/// Segments separated by `0x00`, with the whole thing suffixed with `[0x00, 0xFF]`.
bytes: Vec<u8>,
}
impl Deref for DbKey { impl Deref for DbKey {
type Target = [u8]; type Target = [u8];
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.0[0..(self.0.len() - 2)] &self.bytes[0..(self.bytes.len() - 2)]
} }
} }
impl DbKey { impl DbKey {
fn from_raw(mut key: Vec<u8>) -> Self { fn from_raw(mut key: Vec<u8>) -> Self {
key.extend_from_slice(&[0, 1]); key.extend_from_slice(&[0x00, 0xFF]);
Self(key) Self { bytes: key }
} }
fn new<'a>(path: impl IntoIterator<Item = DbPathSegment<'a>>) -> Self { fn new<'a>(path: impl IntoIterator<Item = DbPathSegment<'a>>) -> Self {
// Null bytes are not allowed within path segments, and will cause a panic. // Null bytes are not allowed within path segments, and will cause a panic.
// The segments are separated by `[0, 0]`. // Bytes of `0xFF` cannot appear in valid UTF-8.
// Two null-bytes are used to allow for easy range lookups by suffixing the key with: // The byte vector stored in a `DbKey` is formed by interspersing the segments with `0x00`, and suffixing the key with `[0x00, 0xFF]`.
// `[0]..[0, 1]` // This lets us represent three significant keys with a single allocation:
// To avoid reallocations, we always suffix the key with `[0, 1]`. // * The key for querying the path itself: `bytes[..bytes.len() - 2]`, e.g. `b"first\x00second"`
// Then, a specific key can be looked up using by omitting the last two bytes. // * The keys for range-querying the path's children:
// By omitting one byte, you get the start of the range of all paths within this path. // * Start (inclusive): `bytes[..bytes.len() - 1]`, e.g. `b"first\x00second\x00"`
// By not omitting any bytes, you get the end of that range. // * End (exclusive): `bytes[..]`, e.g. `b"first\x00second\x00\xFF"`
let mut bytes = Vec::new(); let mut bytes = Vec::new();
@ -179,22 +182,23 @@ impl DbKey {
!segment.as_bytes().contains(&0x00), !segment.as_bytes().contains(&0x00),
"A path segment must not contain null bytes." "A path segment must not contain null bytes."
); );
// No need to check for `0xFF` bytes in UTF-8 strings.
bytes.extend_from_slice(segment.as_bytes()); bytes.extend_from_slice(segment.as_bytes());
bytes.extend_from_slice(&[0, 0]); bytes.push(0x00);
} }
if let Some(last_byte) = bytes.last_mut() { if let Some(last_byte) = bytes.last_mut() {
*last_byte = 1; bytes.push(0xFF);
} else { } else {
panic!("An empty path is not a valid path."); panic!("An empty path is not a valid path.");
} }
DbKey(bytes) DbKey { bytes }
} }
fn range_of_children(&self) -> Range<&[u8]> { fn range_of_children(&self) -> Range<&[u8]> {
(&self.0[0..(self.0.len() - 1)])..(&self.0[..]) (&self.bytes[0..(self.bytes.len() - 1)])..(&self.bytes[..])
} }
fn segments(&self) -> impl Iterator<Item = DbPathSegment<'_>> { fn segments(&self) -> impl Iterator<Item = DbPathSegment<'_>> {
@ -209,7 +213,7 @@ impl DbKey {
if let Some(end_index) = self.rest.iter().position(|byte| *byte == 0) { if let Some(end_index) = self.rest.iter().position(|byte| *byte == 0) {
let segment = &self.rest[..end_index]; let segment = &self.rest[..end_index];
let segment = str::from_utf8(segment).unwrap(); let segment = str::from_utf8(segment).unwrap();
self.rest = &self.rest[end_index + 2..]; self.rest = &self.rest[end_index + 1..];
Some(Cow::Borrowed(segment)) Some(Cow::Borrowed(segment))
} else { } else {
@ -219,7 +223,7 @@ impl DbKey {
} }
SegmentIterator { SegmentIterator {
rest: self.0.as_slice(), rest: self.bytes.as_slice(),
} }
} }
} }