Files
@ 5da3dcf76c51
Branch filter:
Location: CSY/reowolf/src/collections/string_pool.rs - annotation
5da3dcf76c51
6.8 KiB
application/rls-services+xml
Remove stale todo
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | 13bac4f35619 7f25ee16c39b 1a42eb33aa76 87aa65714efe 87aa65714efe 13bac4f35619 012b61623f5a 13bac4f35619 7f25ee16c39b 1a42eb33aa76 13bac4f35619 13bac4f35619 1a42eb33aa76 13bac4f35619 13bac4f35619 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 ddddcd3cc9aa 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 1a42eb33aa76 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa 13bac4f35619 13bac4f35619 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 87aa65714efe 012b61623f5a 7f25ee16c39b 7f25ee16c39b 7f25ee16c39b 7f25ee16c39b 7f25ee16c39b 012b61623f5a 7f25ee16c39b 012b61623f5a 7f25ee16c39b 87aa65714efe 7f25ee16c39b 7f25ee16c39b 7f25ee16c39b 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa ddddcd3cc9aa 1a42eb33aa76 13bac4f35619 5da3dcf76c51 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 7f25ee16c39b 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 1a42eb33aa76 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 b69f417a9972 b69f417a9972 b69f417a9972 b69f417a9972 b69f417a9972 b69f417a9972 b69f417a9972 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 13bac4f35619 | use std::ptr::null_mut;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use std::fmt::{Debug, Display, Result as FmtResult};
use crate::common::Formatter;
const SLAB_SIZE: usize = u16::MAX as usize;
#[derive(Clone)]
pub struct StringRef<'a> {
data: *const u8,
length: usize,
_phantom: PhantomData<&'a [u8]>,
}
// As the StringRef is an immutable thing:
unsafe impl Sync for StringRef<'_> {}
unsafe impl Send for StringRef<'_> {}
impl<'a> StringRef<'a> {
/// `new` constructs a new StringRef whose data is not owned by the
/// `StringPool`, hence cannot have a `'static` lifetime.
pub(crate) fn new(data: &'a [u8]) -> StringRef<'a> {
// This is an internal (compiler) function: so debug_assert that the
// string is valid ascii. Most commonly the input will come from the
// code's source file, which is checked for ASCII-ness anyway.
debug_assert!(data.is_ascii());
let length = data.len();
let data = data.as_ptr();
StringRef{ data, length, _phantom: PhantomData }
}
pub fn as_str(&self) -> &'a str {
unsafe {
let slice = std::slice::from_raw_parts::<'a, u8>(self.data, self.length);
std::str::from_utf8_unchecked(slice)
}
}
pub fn as_bytes(&self) -> &'a [u8] {
unsafe {
std::slice::from_raw_parts::<'a, u8>(self.data, self.length)
}
}
}
impl<'a> Debug for StringRef<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
f.write_str("StringRef{ value: ")?;
f.write_str(self.as_str())?;
f.write_str(" }")
}
}
impl<'a> Display for StringRef<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
f.write_str(self.as_str())
}
}
impl PartialEq for StringRef<'_> {
fn eq(&self, other: &StringRef) -> bool {
self.as_str() == other.as_str()
}
}
impl Eq for StringRef<'_> {}
impl Hash for StringRef<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write(self.as_bytes());
}
}
struct StringPoolSlab {
prev: *mut StringPoolSlab,
data: Vec<u8>,
remaining: usize,
}
impl StringPoolSlab {
fn new(prev: *mut StringPoolSlab) -> Self {
Self{ prev, data: Vec::with_capacity(SLAB_SIZE), remaining: SLAB_SIZE }
}
}
/// StringPool is a ever-growing pool of strings. Strings have a maximum size
/// equal to the slab size. The slabs are essentially a linked list to maintain
/// pointer-stability of the strings themselves.
/// All `StringRef` instances are invalidated when the string pool is dropped
pub(crate) struct StringPool {
last: *mut StringPoolSlab,
}
impl StringPool {
pub(crate) fn new() -> Self {
// To have some stability we just turn a box into a raw ptr.
let initial_slab = Box::new(StringPoolSlab::new(null_mut()));
let initial_slab = Box::into_raw(initial_slab);
StringPool{
last: initial_slab,
}
}
/// Interns a string to the `StringPool`, returning a reference to it. The
/// pointer owned by `StringRef` is `'static` as the `StringPool` doesn't
/// reallocate/deallocate until dropped (which only happens at the end of
/// the program.)
pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef<'static> {
let data_len = data.len();
assert!(data_len <= SLAB_SIZE, "string is too large for slab"); // if you hit this, create logic for large-string allocations
debug_assert!(std::str::from_utf8(data).is_ok(), "string to intern is not valid UTF-8 encoded");
let mut last = unsafe{&mut *self.last};
if data.len() > last.remaining {
// Doesn't fit: allocate new slab
self.alloc_new_slab();
last = unsafe{&mut *self.last};
}
// Must fit now, compute hash and put in buffer
debug_assert!(data_len <= last.remaining);
let range_start = last.data.len();
last.data.extend_from_slice(data);
last.remaining -= data_len;
debug_assert_eq!(range_start + data_len, last.data.len());
unsafe {
let start = last.data.as_ptr().offset(range_start as isize);
StringRef{ data: start, length: data_len, _phantom: PhantomData }
}
}
fn alloc_new_slab(&mut self) {
let new_slab = Box::new(StringPoolSlab::new(self.last));
let new_slab = Box::into_raw(new_slab);
self.last = new_slab;
}
}
impl Drop for StringPool {
fn drop(&mut self) {
let mut new_slab = self.last;
while !new_slab.is_null() {
let cur_slab = new_slab;
unsafe {
new_slab = (*cur_slab).prev;
Box::from_raw(cur_slab); // consume and deallocate
}
}
}
}
// String pool cannot be cloned, and the created `StringRef` instances remain
// allocated until the end of the program, so it is always safe to send. It is
// also sync in the sense that it becomes an immutable thing after compilation,
// but lets not derive that if we would ever become a multithreaded compiler in
// the future.
unsafe impl Send for StringPool {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_just_fits() {
let large = "0".repeat(SLAB_SIZE);
let mut pool = StringPool::new();
let interned = pool.intern(large.as_bytes());
assert_eq!(interned.as_str(), large);
}
#[test]
#[should_panic]
fn test_string_too_large() {
let large = "0".repeat(SLAB_SIZE + 1);
let mut pool = StringPool::new();
let _interned = pool.intern(large.as_bytes());
}
#[test]
fn test_lots_of_small_allocations() {
const NUM_PER_SLAB: usize = 32;
const NUM_SLABS: usize = 4;
let to_intern = "0".repeat(SLAB_SIZE / NUM_PER_SLAB);
let mut pool = StringPool::new();
let mut last_slab = pool.last;
let mut all_refs = Vec::new();
// Fill up first slab
for _alloc_idx in 0..NUM_PER_SLAB {
let interned = pool.intern(to_intern.as_bytes());
all_refs.push(interned);
assert!(std::ptr::eq(last_slab, pool.last));
}
for _slab_idx in 0..NUM_SLABS-1 {
for alloc_idx in 0..NUM_PER_SLAB {
let interned = pool.intern(to_intern.as_bytes());
all_refs.push(interned);
if alloc_idx == 0 {
// First allocation produces a new slab
assert!(!std::ptr::eq(last_slab, pool.last));
last_slab = pool.last;
} else {
assert!(std::ptr::eq(last_slab, pool.last));
}
}
}
// All strings are still correct
for string_ref in all_refs {
assert_eq!(string_ref.as_str(), to_intern);
}
}
}
|