Changeset - bc7021d551ad
[Not reviewed]
0 2 0
Christopher Esterhuyse - 5 years ago 2020-02-20 21:49:25
christopher.esterhuyse@gmail.com
bitsets are so great
2 files changed with 35 insertions and 23 deletions:
0 comments (0 inline, 0 general)
src/runtime/experimental/bits.rs
Show inline comments
 
use crate::common::*;
 
use std::alloc::Layout;
 

	
 
/// Given an iterator over BitChunk Items, iterates over the indices (each represented as a u32) for which the bit is SET,
 
/// treating the bits in the BitChunk as a contiguous array.
 
/// e.g. input [0b111000, 0b11] gives output [3, 4, 5, 32, 33].
 
/// observe that the bits per chunk are ordered from least to most significant bits, yielding smaller to larger usizes.
 
/// assumes chunk_iter will yield no more than std::u32::MAX / 32 chunks
 

	
 
pub const fn usize_bytes() -> usize {
 
    std::mem::size_of::<usize>()
 
}
 
pub const fn usize_bits() -> usize {
 
    usize_bytes() * 8
 
}
 
pub const fn usizes_for_bits(bits: usize) -> usize {
 
    (bits + (usize_bits() - 1)) / usize_bits()
 
}
 

	
 
type Chunk = usize;
 
type BitIndex = usize;
 
pub(crate) struct BitChunkIter<I: Iterator<Item = Chunk>> {
 
    cached: usize,
 
    chunk_iter: I,
 
    next_bit_index: BitIndex,
 
}
 
@@ -86,59 +87,57 @@ impl<I: Iterator<Item = Chunk>> Iterator for BitChunkIter<I> {
 
/*  --properties-->
 
     ___ ___ ___ ___
 
    |___|___|___|___|
 
  | |___|___|___|___|
 
  | |___|___|___|___|
 
  | |___|___|___|___|
 
  |
 
  V
 
 entity chunks (groups of size usize_bits())
 
*/
 

	
 
// TODO newtypes Entity and Property
 

	
 
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
 
struct Pair {
 
    entity: u32,
 
    property: u32,
 
}
 
impl From<[u32; 2]> for Pair {
 
    fn from([entity, property]: [u32; 2]) -> Self {
 
        Pair { entity, property }
 
    }
 
}
 
struct BitMatrix {
 
    bounds: Pair,
 
    buffer: *mut usize,
 
    bounds: Pair,
 
    layout: Layout, // layout of the currently-allocated buffer
 
}
 
impl Drop for BitMatrix {
 
    fn drop(&mut self) {
 
        let total_chunks = Self::row_chunks(self.bounds.property as usize)
 
            * Self::column_chunks(self.bounds.entity as usize);
 
        let layout = Self::layout_for(total_chunks);
 
        unsafe {
 
            // ?
 
            std::alloc::dealloc(self.buffer as *mut u8, layout);
 
            std::alloc::dealloc(self.buffer as *mut u8, self.layout);
 
        }
 
    }
 
}
 
impl Debug for BitMatrix {
 
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
 
        let row_chunks = Self::row_chunks(self.bounds.property as usize);
 
        let column_chunks = Self::column_chunks(self.bounds.entity as usize);
 
        for property in 0..row_chunks {
 
            for entity_chunk in 0..column_chunks {
 
                write!(f, "|")?;
 
                let mut chunk = unsafe { *self.buffer.add(row_chunks * entity_chunk + property) };
 
                let end = if entity_chunk + 1 == column_chunks {
 
                    self.bounds.entity % usize_bits() as u32
 
                } else {
 
                    usize_bits() as u32
 
                };
 
                for _ in 0..end {
 
                    let c = match chunk & 1 {
 
                        0 => '0',
 
                        _ => '1',
 
                    };
 
                    write!(f, "{}", c)?;
 
                    chunk >>= 1;
 
                }
 
@@ -167,78 +166,72 @@ impl BitMatrix {
 
        let row = Self::row_of(at.entity as usize);
 
        let row_chunks = self.bounds.property as usize;
 
        let o_of = row * row_chunks + at.property as usize;
 
        [o_of, o_in]
 
    }
 
    // returns a u32 which has bits 000...000111...111
 
    // for the last JAGGED chunk given the column size
 
    // if the last chunk is not jagged (when entity_bound % 32 == 0)
 
    // None is returned,
 
    // otherwise Some(x) is returned such that x & chunk would mask out
 
    // the bits NOT in 0..entity_bound
 
    fn last_row_chunk_mask(entity_bound: u32) -> Option<usize> {
 
        let zero_prefix_len = entity_bound as usize % usize_bits();
 
        if zero_prefix_len == 0 {
 
            None
 
        } else {
 
            Some(!0 >> (usize_bits() - zero_prefix_len))
 
        }
 
    }
 
    fn assert_within_bounds(&self, at: Pair) {
 
        assert!(at.entity < self.bounds.entity);
 
        assert!(at.property < self.bounds.property);
 
    }
 

	
 
    fn layout_for(mut total_chunks: usize) -> std::alloc::Layout {
 
    fn layout_for(total_chunks: usize) -> std::alloc::Layout {
 
        unsafe {
 
            // this layout is ALWAYS valid:
 
            // 1. size is always nonzero
 
            // 2. size is always a multiple of 4 and 4-aligned
 
            if total_chunks == 0 {
 
                total_chunks = 1;
 
            }
 
            std::alloc::Layout::from_size_align_unchecked(
 
                usize_bytes() * total_chunks,
 
                usize_bytes(),
 
            )
 
            Layout::from_size_align_unchecked(usize_bytes() * total_chunks.max(1), usize_bytes())
 
        }
 
    }
 
    /////////
 

	
 
    fn reshape(&mut self, bounds: Pair) {
 
        todo!()
 
    }
 

	
 
    fn new(bounds: Pair) -> Self {
 
        let total_chunks = Self::row_chunks(bounds.property as usize)
 
            * Self::column_chunks(bounds.entity as usize);
 
        let layout = Self::layout_for(total_chunks);
 
        let buffer;
 
        unsafe {
 
            buffer = std::alloc::alloc(layout) as *mut usize;
 
            buffer.write_bytes(0u8, total_chunks);
 
        };
 
        Self { buffer, bounds }
 
        Self { buffer, bounds, layout }
 
    }
 
    fn set(&mut self, at: Pair) {
 
        self.assert_within_bounds(at);
 
        let [o_of, o_in] = self.offsets_unchecked(at);
 
        unsafe { *self.buffer.add(o_of) |= 1 << o_in };
 
    }
 
    fn unset(&mut self, at: Pair) {
 
        self.assert_within_bounds(at);
 
        let [o_of, o_in] = self.offsets_unchecked(at);
 
        unsafe { *self.buffer.add(o_of) &= !(1 << o_in) };
 
    }
 
    fn test(&self, at: Pair) -> bool {
 
        self.assert_within_bounds(at);
 
        let [o_of, o_in] = self.offsets_unchecked(at);
 
        unsafe { *self.buffer.add(o_of) & 1 << o_in != 0 }
 
    }
 

	
 
    fn batch_mut<'a, 'b>(&mut self, mut chunk_mut_fn: impl FnMut(&'b mut [BitChunk])) {
 
        let row_chunks = Self::row_chunks(self.bounds.property as usize);
 
        let column_chunks = Self::column_chunks(self.bounds.entity as usize);
 
        let mut ptr = self.buffer;
 
        for _row in 0..column_chunks {
 
            let slice;
 
            unsafe {
 
@@ -276,54 +269,48 @@ impl BitMatrix {
 
        for _row in 0..column_chunks {
 
            let slice;
 
            unsafe {
 
                let slicey = std::slice::from_raw_parts(ptr, row_chunks);
 
                slice = std::mem::transmute(slicey);
 
                ptr = ptr.add(row_chunks);
 
            }
 
            let chunk = fold_fn(slice);
 
            buf.push(chunk.0);
 
        }
 
        if let Some(mask) = Self::last_row_chunk_mask(self.bounds.entity) {
 
            *buf.iter_mut().last().unwrap() &= mask;
 
        }
 
        BitChunkIter::new(buf.drain(buf_start..)).map(|x| x as u32)
 
    }
 
}
 

	
 
use derive_more::*;
 
#[derive(
 
    Debug, Copy, Clone, BitAnd, Not, BitOr, BitXor, BitAndAssign, BitOrAssign, BitXorAssign,
 
)]
 
#[repr(transparent)]
 
pub struct BitChunk(usize);
 
impl BitChunk {
 
    const fn bits() -> usize {
 
        Self::bytes() * 8
 
    }
 
    const fn bytes() -> usize {
 
        std::mem::size_of::<Self>()
 
    }
 
    const fn any(self) -> bool {
 
        self.0 != FALSE.0
 
    }
 
    const fn all(self) -> bool {
 
        self.0 == TRUE.0
 
    }
 
}
 
const TRUE: BitChunk = BitChunk(!0);
 
const FALSE: BitChunk = BitChunk(0);
 

	
 
#[test]
 
fn matrix_test() {
 
    let mut m = BitMatrix::new(Pair { entity: 70, property: 3 });
 
    m.set([2, 0].into());
 
    m.set([40, 1].into());
 
    m.set([40, 2].into());
 
    m.set([40, 0].into());
 
    println!("{:?}", &m);
 

	
 
    m.batch_mut(|p| p[0] = TRUE);
 
    println!("{:?}", &m);
 

	
 
    for i in (0..40).step_by(7) {
 
        m.unset([i, 0].into());
src/runtime/experimental/vec_storage.rs
Show inline comments
 
@@ -26,48 +26,49 @@ impl Bitvec {
 
    }
 
    // assumes read will not go out of bounds
 
    #[inline]
 
    unsafe fn contains(&self, i: usize) -> bool {
 
        let [o_of, o_in] = Self::offsets_of(i);
 
        (*self.0.get_unchecked(o_of) & (1 << o_in)) != 0
 
    }
 
    fn pop_first(&mut self) -> Option<usize> {
 
        let i = self.first()?;
 
        unsafe { self.remove(i) };
 
        Some(i)
 
    }
 
    fn iter(&self) -> impl Iterator<Item = usize> + '_ {
 
        BitChunkIter::new(self.0.iter().copied()).map(|x| x as usize)
 
    }
 
    fn first(&self) -> Option<usize> {
 
        self.iter().next()
 
    }
 
}
 

	
 
// A T-type arena which:
 
// 1. does not check for the ABA problem
 
// 2. imposes the object keys on the user
 
// 3. allows the reservation of a space (getting the key) to precede the value being provided.
 
// 4. checks for user error
 
//
 
// Data contains values in one of three states:
 
// 1. occupied: ininitialized. will be dropped.
 
// 2. vacant: uninitialized. may be reused implicitly. won't be dropped.
 
// 2. reserved: uninitialized. may be occupied implicitly. won't be dropped.
 
//
 
// element access is O(1)
 
// removal is O(1) amortized.
 
// insertion is O(N) with a small constant factor,
 
//    doing at worst one linear probe through N/(word_size) contiguous words
 
//
 
// invariant A: data elements are inititalized <=> occupied bit is set
 
// invariant B: occupied and vacant have an empty intersection
 
// invariant C: (vacant U occupied) subset of (0..data.len)
 
// invariant D: last element of data is not in VACANT state
 
// invariant E: number of allocated bits in vacant and occupied >= data.len()
 
pub struct VecStorage<T> {
 
    data: Vec<MaybeUninit<T>>,
 
    occupied: Bitvec,
 
    vacant: Bitvec,
 
}
 
impl<T> Default for VecStorage<T> {
 
    fn default() -> Self {
 
        Self { data: Default::default(), vacant: Default::default(), occupied: Default::default() }
 
@@ -100,48 +101,62 @@ impl<T: Debug> Debug for VecStorage<T> {
 
                FmtT::Vacant
 
            } else {
 
                FmtT::Reserved
 
            }
 
        });
 
        f.debug_list().entries(iter).finish()
 
    }
 
}
 
impl<T> Drop for VecStorage<T> {
 
    fn drop(&mut self) {
 
        self.clear();
 
    }
 
}
 
impl<T> VecStorage<T> {
 
    // ASSUMES that i in 0..self.data.len()
 
    unsafe fn get_occupied_unchecked(&self, i: usize) -> Option<&T> {
 
        if self.occupied.contains(i) {
 
            None
 
        } else {
 
            // 2. Invariant A => reading valid ata
 
            Some(&*self.data.get_unchecked(i).as_ptr())
 
        }
 
    }
 
    //////////////
 
    pub fn with_reserved_range(range_end: usize) -> Self {
 
        let mut data = Vec::with_capacity(range_end);
 
        unsafe {
 
            // data is uninitialized, as intended
 
            data.set_len(range_end);
 
        }
 
        let bitset_len = (range_end + (usize_bits() - 1)) / usize_bits();
 
        let chunk_iter = std::iter::repeat(0usize).take(bitset_len);
 
        Self {
 
            data,
 
            vacant: Bitvec(chunk_iter.clone().collect()),
 
            occupied: Bitvec(chunk_iter.collect()),
 
        }
 
    }
 
    pub fn clear(&mut self) {
 
        for i in 0..self.data.len() {
 
            // SAFE: bitvec bounds ensured by invariant E
 
            if unsafe { self.occupied.contains(i) } {
 
                // invariant A: this element is OCCUPIED
 
                unsafe {
 
                    // 1. by construction, i is in bounds
 
                    // 2. i is occupied => initialized data is being dropped
 
                    drop(self.data.get_unchecked_mut(i).as_ptr().read());
 
                }
 
            }
 
        }
 
        self.vacant.0.clear();
 
        self.occupied.0.clear();
 
    }
 
    pub fn iter(&self) -> impl Iterator<Item = &T> {
 
        (0..self.data.len()).filter_map(move |i| unsafe { self.get_occupied_unchecked(i) })
 
    }
 
    pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut T> {
 
        (0..self.data.len()).filter_map(move |i| unsafe {
 
            // SAFE: bitvec bounds ensured by invariant E
 
            if self.occupied.contains(i) {
 
                // Invariant A => reading valid data
 
                Some(&mut *self.data.get_unchecked_mut(i).as_mut_ptr())
 
@@ -247,43 +262,53 @@ impl<T> VecStorage<T> {
 
                }
 
            }
 
        } else {
 
            // ... by populating self.vacant.
 
            // SAFE: bitvec bounds ensured by invariant E
 
            unsafe { self.vacant.insert(i) };
 
        }
 
        value
 
    }
 
    pub fn iter_reserved(&self) -> impl Iterator<Item = usize> + '_ {
 
        BitChunkIter::new(self.occupied.0.iter().zip(self.vacant.0.iter()).map(|(&a, &b)| !(a | b)))
 
            .take_while(move |&x| x < self.data.len())
 
    }
 
}
 

	
 
#[test]
 
fn vec_storage() {
 
    #[derive(Debug)]
 
    struct Foo;
 
    impl Drop for Foo {
 
        fn drop(&mut self) {
 
            println!("DROPPING FOO!");
 
        }
 
    }
 
    let mut v = VecStorage::default();
 
    let mut v = VecStorage::with_reserved_range(4);
 
    let i0 = v.new_occupied(Foo);
 
    println!("{:?}", &v);
 

	
 
    let i1 = v.new_reserved();
 
    println!("{:?}", &v);
 

	
 
    println!("reserved {:?}", v.iter_reserved().collect::<Vec<_>>());
 

	
 
    let q = v.vacate(i0);
 
    println!("q {:?}", q);
 
    println!("q {:?}", v.vacate(i0));
 
    println!("{:?}", &v);
 

	
 
    println!("q {:?}", v.vacate(2));
 
    println!("{:?}", &v);
 

	
 
    println!("q {:?}", v.vacate(1));
 
    println!("{:?}", &v);
 

	
 
    v.occupy_reserved(i1, Foo);
 
    println!("{:?}", &v);
 

	
 
    *v.get_occupied_mut(i1).unwrap() = Foo;
 
    println!("{:?}", &v);
 

	
 
    println!("q {:?}", v.vacate(i1));
 
    println!("{:?}", &v);
 
    println!("q {:?}", v.vacate(3));
 
    println!("{:?}", &v);
 
}
0 comments (0 inline, 0 general)