Changeset - 8622657b70f9
[Not reviewed]
0 3 1
MH - 3 years ago 2022-01-07 11:48:04
contact@maxhenger.nl
Add ctor/dtor tests to MPSC queue

Now also testing that resources are properly moved and/or bitwise
copied inside of the MPSC queue. To make this possible the Resource
testing struct was moved outside of the store::component::tests
module.
4 files changed with 142 insertions and 72 deletions:
0 comments (0 inline, 0 general)
src/runtime2/store/component.rs
Show inline comments
 
/*
 
 * Component Store
 
 *
 
 * Concurrent datastructure for creating/destroying/retrieving components using
 
 * their ID. It is essentially a variation on a concurrent freelist. We store an
 
 * array of (potentially null) pointers to data. Indices into this array that
 
 * are unused (but may be left allocated) are in a freelist. So creating a new
 
 * bit of data involves taking an index from this freelist. Destruction involves
 
 * putting the index back.
 
 *
 
 * This datastructure takes care of the threadsafe implementation of the
 
 * freelist and calling the data's destructor when needed. Note that it is not
 
 * completely safe (in Rust's sense of the word) because it is possible to
 
 * get more than one mutable reference to a piece of data. Likewise it is
 
 * possible to put back bogus indices into the freelist, which will destroy the
 
 * integrity of the datastructure.
 
 *
 
 * Some underlying assumptions that led to this design (note that I haven't
 
 * actually checked these conditions or performed any real profiling, yet):
 
 *  - Resizing the freelist should be very rare. The datastructure should grow
 
 *    to some kind of maximum size and stay at that size.
 
 *  - Creation should (preferably) be faster than deletion of data. Reason being
 
 *    that creation implies we're creating a component that has code to be
 
 *    executed. Better to quickly be able to execute code than being able to
 
 *    quickly tear down finished components.
 
 *  - Retrieval is much more likely than creation/destruction.
 
 *
 
 * Some obvious flaws with this implementation:
 
 *  - Because of the freelist implementation we will generally allocate all of
 
 *    the data pointers that are available (i.e. if we have a buffer of size
 
 *    64, but we generally use 33 elements, than we'll have 64 elements
 
 *    allocated), which might be wasteful at larger array sizes (which are
 
 *    always powers of two).
 
 *  - A lot of concurrent operations are not necessary: we may move some of the
 
 *    access to the global concurrent datastructure by an initial access to some
 
 *    kind of thread-local datastructure.
 
 */
 

	
 
use std::mem::transmute;
 
use std::alloc::{alloc, dealloc, Layout};
 
use std::ptr;
 
use std::sync::atomic::{AtomicUsize, Ordering};
 

	
 
use super::unfair_se_lock::{UnfairSeLock, UnfairSeLockSharedGuard};
 

	
 
pub struct ComponentStore<T: Sized> {
 
    inner: UnfairSeLock<Inner<T>>,
 
    read_head: AtomicUsize,
 
    write_head: AtomicUsize,
 
    limit_head: AtomicUsize,
 
}
 

	
 
unsafe impl<T: Sized> Send for ComponentStore<T>{}
 
unsafe impl<T: Sized> Sync for ComponentStore<T>{}
 

	
 
struct Inner<T: Sized> {
 
    freelist: Vec<u32>,
 
    data: Vec<*mut T>,
 
    size: usize,
 
    compare_mask: usize,
 
    index_mask: usize,
 
}
 

	
 
type InnerRead<'a, T> = UnfairSeLockSharedGuard<'a, Inner<T>>;
 

	
 
impl<T: Sized> ComponentStore<T> {
 
    pub fn new(initial_size: usize) -> Self {
 
        Self::assert_valid_size(initial_size);
 

	
 
        // Fill initial freelist and preallocate data array
 
        let mut initial_freelist = Vec::with_capacity(initial_size);
 
        for idx in 0..initial_size {
 
            initial_freelist.push(idx as u32)
 
        }
 

	
 
        let mut initial_data = Vec::new();
 
        initial_data.resize(initial_size, ptr::null_mut());
 

	
 
        // Return initial store
 
        return Self{
 
            inner: UnfairSeLock::new(Inner{
 
                freelist: initial_freelist,
 
                data: initial_data,
 
                size: initial_size,
 
                compare_mask: 2*initial_size - 1,
 
                index_mask: initial_size - 1,
 
            }),
 
            read_head: AtomicUsize::new(0),
 
            write_head: AtomicUsize::new(initial_size),
 
            limit_head: AtomicUsize::new(initial_size),
 
        };
 
    }
 

	
 
    /// Creates a new element initialized to the provided `value`. This returns
 
    /// the index at which the element can be retrieved.
 
    pub fn create(&self, value: T) -> u32 {
 
        let lock = self.inner.lock_shared();
 
        let (lock, index) = self.pop_freelist_index(lock);
 
        self.initialize_at_index(lock, index, value);
 
        return index;
 
    }
 

	
 
    /// Destroys an element at the provided `index`. The caller must make sure
 
    /// that it does not use any previously received references to the data at
 
    /// this index, and that no more calls to `get` are performed using this
 
    /// index. This is allowed again if the index has been reacquired using
 
    /// `create`.
 
    pub fn destroy(&self, index: u32) {
 
        let lock = self.inner.lock_shared();
 
        self.destruct_at_index(&lock, index);
 
        self.push_freelist_index(&lock, index);
 
    }
 

	
 
    /// Retrieves an element by reference
 
    pub fn get(&self, index: u32) -> &T {
 
        let lock = self.inner.lock_shared();
 
        let value = lock.data[index as usize];
 
        unsafe {
 
            debug_assert!(!value.is_null());
 
            return &*value;
 
        }
 
    }
 

	
 
    /// Retrieves an element by mutable reference. The caller should ensure that
 
    /// use of that mutability is thread-safe
 
    pub fn get_mut(&self, index: u32) -> &mut T {
 
        let lock = self.inner.lock_shared();
 
        let value = lock.data[index as usize];
 
        unsafe {
 
            debug_assert!(!value.is_null());
 
            return &mut *value;
 
        }
 
    }
 

	
 
    #[inline]
 
    fn pop_freelist_index<'a>(&'a self, mut read_lock: InnerRead<'a, T>) -> (InnerRead<'a, T>, u32) {
 
        'attempt_read: loop {
 
            // Load indices and check for reallocation condition
 
            let current_size = read_lock.size;
 
            let mut read_index = self.read_head.load(Ordering::Relaxed);
 
            let limit_index = self.limit_head.load(Ordering::Acquire);
 

	
 
            if read_index == limit_index {
 
                read_lock = self.reallocate(current_size, read_lock);
 
                continue 'attempt_read;
 
            }
 

	
 
            loop {
 
                let preemptive_read = read_lock.freelist[read_index & read_lock.index_mask];
 
                if let Err(actual_read_index) = self.read_head.compare_exchange(
 
                    read_index, (read_index + 1) & read_lock.compare_mask,
 
                    Ordering::AcqRel, Ordering::Acquire
 
                ) {
 
                    // We need to try again
 
                    read_index = actual_read_index;
 
                    continue 'attempt_read;
 
                }
 

	
 
                // If here then we performed the read
 
                return (read_lock, preemptive_read);
 
            }
 
        }
 
    }
 

	
 
    #[inline]
 
    fn initialize_at_index(&self, read_lock: InnerRead<T>, index: u32, value: T) {
 
        let mut target_ptr = read_lock.data[index as usize];
 

	
 
        unsafe {
 
            if target_ptr.is_null() {
 
                let layout = Layout::for_value(&value);
 
                target_ptr = std::alloc::alloc(layout).cast();
 
                let rewrite: *mut *mut T = transmute(read_lock.data.as_ptr());
 
                *rewrite.add(index as usize) = target_ptr;
 
            }
 

	
 
            std::ptr::write(target_ptr, value);
 
        }
 
    }
 

	
 
    #[inline]
 
    fn push_freelist_index(&self, read_lock: &InnerRead<T>, index_to_put_back: u32) {
 
        // Acquire an index in the freelist to which we can write
 
        let mut cur_write_index = self.write_head.load(Ordering::Relaxed);
 
        let mut new_write_index = (cur_write_index + 1) & read_lock.compare_mask;
 
        while let Err(actual_write_index) = self.write_head.compare_exchange(
 
            cur_write_index, new_write_index,
 
            Ordering::AcqRel, Ordering::Acquire
 
        ) {
 
            cur_write_index = actual_write_index;
 
            new_write_index = (cur_write_index + 1) & read_lock.compare_mask;
 
        }
 

	
 
        // We own the data at the index, write to it and notify reader through
 
        // limit_head that it can be read from. Note that we cheat around the
 
        // rust mutability system here :)
 
        unsafe {
 
            let target: *mut u32 = transmute(read_lock.freelist.as_ptr());
 
            *(target.add(cur_write_index & read_lock.index_mask)) = index_to_put_back;
 
        }
 

	
 
        // Essentially spinlocking, relaxed failure ordering because the logic
 
        // is that a write first moves the `write_head`, then the `limit_head`.
 
        while let Err(_) = self.limit_head.compare_exchange(
 
            cur_write_index, new_write_index,
 
            Ordering::AcqRel, Ordering::Relaxed
 
        ) {};
 
    }
 

	
 
    #[inline]
 
    fn destruct_at_index(&self, read_lock: &InnerRead<T>, index: u32) {
 
        let target_ptr = read_lock.data[index as usize];
 
        unsafe{ ptr::drop_in_place(target_ptr); }
 
    }
 

	
 
    // NOTE: Bit of a mess, and could have a cleanup with better logic for the
 
    // resizing. Maybe even a different indexing scheme...
 
    fn reallocate(&self, old_size: usize, inner: InnerRead<T>) -> InnerRead<T> {
 
        drop(inner);
 
        {
 
            // After dropping read lock, acquire write lock
 
            let mut lock = self.inner.lock_exclusive();
 

	
 
            if old_size == lock.size {
 
                // We are the thread that is supposed to reallocate
 
                let new_size = old_size * 2;
 
                Self::assert_valid_size(new_size);
 

	
 
                // Note that the atomic indices are in the range [0, new_size)
 
                // already, so we need to be careful
 
                let new_index_mask = new_size - 1;
 
                let new_compare_mask = (2 * new_size) - 1;
 
                lock.data.resize(new_size, ptr::null_mut());
 
                lock.freelist.resize(new_size, 0);
 
                for idx in 0..old_size {
 
                    lock.freelist[old_size + idx] = lock.freelist[idx];
 
                }
 

	
 
                // We need to fill the freelist with the indices of all of the
 
                // new elements that we have just created.
 
                debug_assert_eq!(self.limit_head.load(Ordering::SeqCst), self.write_head.load(Ordering::SeqCst));
 
                let old_read_index = self.read_head.load(Ordering::SeqCst);
 
                let old_write_index = self.write_head.load(Ordering::SeqCst);
 

	
 
                if old_read_index > old_write_index {
 
                    // Read index wraps, so keep it as-is and fill
 
                    let new_read_index = old_read_index + old_size;
 
                    for index in 0..old_size {
 
                        let target_idx = (new_read_index + index) & new_index_mask;
 
                        lock.freelist[target_idx] = (old_size + index) as u32;
 
                    }
 

	
 
                    self.read_head.store(new_read_index, Ordering::SeqCst);
 
                    debug_assert!(new_read_index < 2*new_size);
 
                    debug_assert!(old_write_index.wrapping_sub(new_read_index) & new_compare_mask <= new_size);
 
                } else {
 
                    // No wrapping, so increment write index
 
                    let new_write_index = old_write_index + old_size;
 
                    for index in 0..old_size {
 
                        let target_idx = (old_write_index + index) & new_index_mask;
 
                        lock.freelist[target_idx] = (old_size + index) as u32;
 
                    }
 

	
 
                    // Update write/limit heads
 
                    self.write_head.store(new_write_index, Ordering::SeqCst);
 
                    self.limit_head.store(new_write_index, Ordering::SeqCst);
 
                    debug_assert!(new_write_index < 2*new_size);
 
                    debug_assert!(new_write_index.wrapping_sub(old_read_index) & new_compare_mask <= new_size);
 
                }
 

	
 
                // Update sizes and masks
 
                lock.size = new_size;
 
                lock.compare_mask = new_compare_mask;
 
                lock.index_mask = new_index_mask;
 
            } // else: someone else allocated, so we don't have to
 
        }
 

	
 
        // We've dropped the write lock, acquire the read lock again
 
        return self.inner.lock_shared();
 
    }
 

	
 
    #[inline]
 
    fn assert_valid_size(size: usize) {
 
        // Condition the size needs to adhere to. Some are a bit excessive, but
 
        // we don't hit this check very often
 
        assert!(
 
            size.is_power_of_two() &&
 
                size >= 4 &&
 
                size <= usize::MAX / 2 &&
 
                size <= u32::MAX as usize
 
        );
 
    }
 
}
 

	
 
impl<T: Sized> Drop for ComponentStore<T> {
 
    fn drop(&mut self) {
 
        let value_layout = Layout::from_size_align(
 
            std::mem::size_of::<T>(), std::mem::align_of::<T>()
 
        ).unwrap();
 

	
 
        // Note that if the indices exist in the freelist then the destructor
 
        // has already been called. So handle them first
 
        let mut lock = self.inner.lock_exclusive();
 

	
 
        let read_index = self.read_head.load(Ordering::Acquire);
 
        let write_index = self.write_head.load(Ordering::Acquire);
 
        debug_assert_eq!(write_index, self.limit_head.load(Ordering::Acquire));
 

	
 
        let mut index = read_index;
 
        while index != write_index {
 
            let dealloc_index = lock.freelist[index & lock.index_mask] as usize;
 
            let target_ptr = lock.data[dealloc_index];
 

	
 
            unsafe {
 
                dealloc(target_ptr.cast(), value_layout);
 
                lock.data[dealloc_index] = ptr::null_mut();
 
            }
 

	
 
            index += 1;
 
            index &= lock.compare_mask;
 
        }
 

	
 
        // With all of those set to null, we'll just iterate through all
 
        // pointers and destruct+deallocate the ones not set to null yet
 
        for target_ptr in lock.data.iter().copied() {
 
            if !target_ptr.is_null() {
 
                unsafe {
 
                    ptr::drop_in_place(target_ptr);
 
                    dealloc(target_ptr.cast(), value_layout);
 
                }
 
            }
 
        }
 
    }
 
}
 

	
 
#[cfg(test)]
 
mod tests {
 
    use super::*;
 
    use crate::runtime2::store::tests::Resource;
 
    use super::super::tests::*;
 

	
 
    use rand::prelude::*;
 
    use rand_pcg::Pcg32;
 

	
 
    use std::sync::Arc;
 
    use std::sync::atomic::{AtomicU64, Ordering};
 

	
 
    fn seeds() -> Vec<[u8;16]> {
 
        return vec![
 
            [241, 47, 70, 87, 240, 246, 20, 173, 219, 143, 74, 23, 158, 58, 205, 172],
 
            [178, 112, 230, 205, 230, 178, 2, 90, 162, 218, 49, 196, 224, 222, 208, 43],
 
            [245, 42, 35, 167, 153, 205, 221, 144, 200, 253, 144, 117, 176, 231, 17, 70],
 
            [143, 39, 177, 216, 124, 96, 225, 39, 30, 82, 239, 193, 133, 58, 255, 193],
 
            [25, 105, 10, 52, 161, 212, 190, 112, 178, 193, 68, 249, 167, 153, 172, 144],
 
        ]
 
    }
 

	
 
    #[test]
 
    fn test_ctor_dtor_simple_unthreaded() {
 
        const NUM_ROUNDS: usize = 5;
 
        const NUM_ELEMENTS: usize = 1024;
 

	
 
        let store = ComponentStore::new(32);
 
        let ctor_counter = Arc::new(AtomicU64::new(0));
 
        let dtor_counter = Arc::new(AtomicU64::new(0));
 
        let counters = Counters::new();
 

	
 
        let mut indices = Vec::with_capacity(NUM_ELEMENTS);
 
        for _round_index in 0..NUM_ROUNDS {
 
            // Creation round
 
            for value in 0..NUM_ELEMENTS {
 
                let new_resource = Resource::new(ctor_counter.clone(), dtor_counter.clone(), value as u64);
 
                let new_resource = Resource::new(&counters, value as u64);
 
                let new_index = store.create(new_resource);
 
                indices.push(new_index);
 
            }
 

	
 
            // Checking round
 
            for el_index in indices.iter().copied() {
 
                let element = store.get(el_index);
 
                assert_eq!(element.val, el_index as u64);
 
            }
 

	
 
            // Destruction round
 
            for el_index in indices.iter().copied() {
 
                store.destroy(el_index);
 
            }
 

	
 
            indices.clear();
 
        }
 

	
 
        let num_ctor_calls = ctor_counter.load(Ordering::Acquire);
 
        let num_dtor_calls = dtor_counter.load(Ordering::Acquire);
 
        assert_eq!(num_ctor_calls, num_dtor_calls);
 
        assert_eq!(num_ctor_calls, (NUM_ROUNDS * NUM_ELEMENTS) as u64);
 
        let num_expected = (NUM_ROUNDS * NUM_ELEMENTS) as u64;
 
        assert_ctor_eq!(counters, num_expected);
 
        assert_dtor_eq!(counters, num_expected);
 
    }
 

	
 
    #[test]
 
    fn test_ctor_dtor_simple_threaded() {
 
        const MAX_SIZE: usize = 1024;
 
        const NUM_THREADS: usize = 4;
 
        const NUM_PER_THREAD: usize = MAX_SIZE / NUM_THREADS;
 
        const NUM_ROUNDS: usize = 4;
 

	
 
        assert!(MAX_SIZE % NUM_THREADS == 0);
 

	
 
        let store = Arc::new(ComponentStore::new(16));
 
        let ctor_counter = Arc::new(AtomicU64::new(0));
 
        let dtor_counter = Arc::new(AtomicU64::new(0));
 
        let counters = Counters::new();
 

	
 
        let mut threads = Vec::with_capacity(NUM_THREADS);
 
        for thread_index in 0..NUM_THREADS {
 
            // Setup local clones to move into the thread
 
            let store = store.clone();
 
            let first_index = thread_index * NUM_PER_THREAD;
 
            let last_index = (thread_index + 1) * NUM_PER_THREAD;
 
            let ctor_counter = ctor_counter.clone();
 
            let dtor_counter = dtor_counter.clone();
 
            let counters = counters.clone();
 

	
 
            let handle = std::thread::spawn(move || {
 
                let mut indices = Vec::with_capacity(last_index - first_index);
 
                for _round_index in 0..NUM_ROUNDS {
 
                    // Creation round
 
                    for value in first_index..last_index {
 
                        let el_index = store.create(Resource::new(ctor_counter.clone(), dtor_counter.clone(), value as u64));
 
                        let el_index = store.create(Resource::new(&counters, value as u64));
 
                        indices.push(el_index);
 
                    }
 

	
 
                    // Checking round
 
                    for (value_offset, el_index) in indices.iter().copied().enumerate() {
 
                        let element = store.get(el_index);
 
                        assert_eq!(element.val, (first_index + value_offset) as u64);
 
                    }
 

	
 
                    // Destruction round
 
                    for el_index in indices.iter().copied() {
 
                        store.destroy(el_index);
 
                    }
 

	
 
                    indices.clear();
 
                }
 
            });
 
            threads.push(handle);
 
        }
 

	
 
        for thread in threads {
 
            thread.join().expect("clean exit");
 
        }
 

	
 
        let num_ctor_calls = ctor_counter.load(Ordering::Acquire);
 
        let num_dtor_calls = dtor_counter.load(Ordering::Acquire);
 
        assert_eq!(num_ctor_calls, num_dtor_calls);
 
        assert_eq!(num_ctor_calls, (NUM_ROUNDS * MAX_SIZE) as u64);
 
        let num_expected = (NUM_ROUNDS * MAX_SIZE) as u64;
 
        assert_ctor_eq!(counters, num_expected);
 
        assert_dtor_eq!(counters, num_expected);
 
    }
 

	
 
    #[test]
 
    fn test_ctor_dtor_random_threaded() {
 
        const NUM_ROUNDS: usize = 4;
 
        const NUM_THREADS: usize = 4;
 
        const NUM_OPERATIONS: usize = 1024;
 
        const NUM_OPS_PER_THREAD: usize = NUM_OPERATIONS / NUM_THREADS;
 
        const NUM_OPS_PER_ROUND: usize = NUM_OPS_PER_THREAD / NUM_ROUNDS;
 
        const NUM_STORED_PER_THREAD: usize = 32;
 

	
 
        assert!(NUM_OPERATIONS % NUM_THREADS == 0);
 
        assert!(NUM_OPS_PER_THREAD / 2 > NUM_STORED_PER_THREAD);
 

	
 
        let seeds = seeds();
 
        for seed_index in 0..seeds.len() {
 
            // Setup store, counters and threads
 
            let store = Arc::new(ComponentStore::new(16));
 
            let ctor_counter = Arc::new(AtomicU64::new(0));
 
            let dtor_counter = Arc::new(AtomicU64::new(0));
 
            let counters = Counters::new();
 

	
 
            let mut threads = Vec::with_capacity(NUM_THREADS);
 
            for thread_index in 0..NUM_THREADS {
 
                // Setup local clones to move into the thread
 
                let store = store.clone();
 
                let ctor_counter = ctor_counter.clone();
 
                let dtor_counter = dtor_counter.clone();
 
                let counters = counters.clone();
 

	
 
                // Setup local rng
 
                let mut seed = seeds[seed_index];
 
                for seed_val_idx in 0..16 {
 
                    seed[seed_val_idx] ^= thread_index as u8; // blegh
 
                }
 
                let mut rng = Pcg32::from_seed(seed);
 

	
 
                let handle = std::thread::spawn(move || {
 
                    let mut stored = Vec::with_capacity(NUM_STORED_PER_THREAD);
 

	
 
                    for _round_index in 0..NUM_ROUNDS {
 
                        // Modify store elements in the store randomly, for some
 
                        // silly definition of random
 
                        for _op_index in 0..NUM_OPS_PER_ROUND {
 
                            // Perform a single operation, depending on current
 
                            // size of the number of values owned by this thread
 
                            let new_value = rng.next_u64();
 
                            let should_create = rng.next_u32() % 2 == 0;
 
                            let is_empty = stored.is_empty();
 
                            let is_full = stored.len() == NUM_STORED_PER_THREAD;
 

	
 
                            if is_empty || (!is_full && should_create) {
 
                                // Must create
 
                                let el_index = store.create(Resource::new(
 
                                    ctor_counter.clone(), dtor_counter.clone(), new_value
 
                                ));
 
                                let el_index = store.create(Resource::new(&counters, new_value));
 
                                stored.push((el_index, new_value));
 
                            } else {
 
                                // Must destroy
 
                                let stored_index = new_value as usize % stored.len();
 
                                let (el_index, el_value) = stored.remove(stored_index);
 
                                store.destroy(el_index);
 
                            }
 
                        }
 

	
 
                        // Checking if the values we own still make sense
 
                        for (el_index, value) in stored.iter().copied() {
 
                            let gotten = store.get(el_index);
 
                            assert_eq!(value, gotten.val, "failed at thread {} value {}", thread_index, el_index);
 
                        }
 
                    }
 

	
 
                    return stored.len(); // return number of remaining elements
 
                });
 
                threads.push(handle);
 
            }
 

	
 
            // Done with the current round
 
            let mut total_left_allocated = 0;
 
            for thread in threads {
 
                let num_still_stored = thread.join().unwrap();
 
                total_left_allocated += num_still_stored as u64;
 
            }
 

	
 
            // Before store is dropped
 
            let num_ctor_calls = ctor_counter.load(Ordering::Acquire);
 
            let num_dtor_calls = dtor_counter.load(Ordering::Acquire);
 
            assert_eq!(num_ctor_calls - total_left_allocated, num_dtor_calls);
 
            // note: cannot determine number of creations, creation/destructor
 
            // is random
 
            let num_ctor = counters.ctor.load(Ordering::Acquire);
 
            assert_dtor_eq!(counters, num_ctor - total_left_allocated);
 

	
 
            // After store is dropped
 
            drop(store);
 
            let num_dtor_calls = dtor_counter.load(Ordering::Acquire);
 
            assert_eq!(num_ctor_calls, num_dtor_calls);
 
            assert_dtor_eq!(counters, num_ctor);
 
        }
 
    }
 
}
 
\ No newline at end of file
src/runtime2/store/mod.rs
Show inline comments
 
#[macro_use]
 
#[cfg(test)]
 
mod tests;
 

	
 
pub mod component;
 
pub mod unfair_se_lock;
 
pub mod queue_mpsc;
 

	
 
pub(crate) use component::ComponentStore;
 

	
 
#[cfg(test)]
 
mod tests {
 
    use std::sync::Arc;
 
    use std::sync::atomic::{AtomicU64, Ordering};
 

	
 
    // Utility resource structure that counts the number of constructors and
 
    // destructor calls.
 
    pub struct Resource {
 
        dtor: Arc<AtomicU64>,
 
        val: u64,
 
    }
 

	
 
    impl Resource {
 
        fn new(ctor: Arc<AtomicU64>, dtor: Arc<AtomicU64>, val: u64) -> Self {
 
            ctor.fetch_add(1, Ordering::SeqCst);
 
            return Self{ dtor, val };
 
        }
 
    }
 

	
 
    impl Drop for Resource {
 
        fn drop(&mut self) {
 
            self.dtor.fetch_add(1, Ordering::SeqCst);
 
        }
 
    }
 
}
 
\ No newline at end of file
 
pub(crate) use component::ComponentStore;
 
\ No newline at end of file
src/runtime2/store/queue_mpsc.rs
Show inline comments
 
use std::sync::atomic::{AtomicU32, Ordering};
 

	
 
use crate::collections::RawArray;
 
use super::unfair_se_lock::{UnfairSeLock, UnfairSeLockSharedGuard};
 

	
 
/// Multiple-producer single-consumer queue. Generally used in the publicly
 
/// accessible fields of a component. The holder of this struct should be the
 
/// consumer. To retrieve access to the producer-side: call `producer()`.
 
///
 
/// This is a queue that will resize (indefinitely) if it becomes full, and will
 
/// not shrink. So probably a temporary thing.
 
///
 
/// In debug mode we'll make sure that there are no producers when the queue is
 
/// dropped. We don't do this in release mode because the runtime is written
 
/// such that components always remain alive (hence, this queue will remain
 
/// accessible) while there are references to it.
 
// NOTE: Addendum to the above remark, not true if the thread owning the
 
// consumer sides crashes, unwinds, and drops the `Box` with it. Question is: do
 
// I want to take that into account?
 
pub struct QueueDynMpsc<T> {
 
    // Entire contents are boxed up such that we can create producers that have
 
    // a pointer to the contents.
 
    inner: Box<Shared<T>>
 
}
 

	
 
// One may move around the queue between threads, as long as there is only one
 
// instance of it.
 
unsafe impl<T> Send for QueueDynMpsc<T>{}
 

	
 
/// Shared data between queue consumer and the queue producers
 
struct Shared<T> {
 
    data: UnfairSeLock<Inner<T>>,
 
    read_head: AtomicU32,
 
    write_head: AtomicU32,
 
    limit_head: AtomicU32,
 
    #[cfg(debug_assertions)] dbg: AtomicU32,
 
}
 

	
 
/// Locked by an exclusive/shared lock. Exclusive lock is obtained when the
 
/// inner data array is resized.
 
struct Inner<T> {
 
    data: RawArray<T>,
 
    compare_mask: u32,
 
    read_mask: u32,
 
}
 

	
 
type InnerRead<'a, T> = UnfairSeLockSharedGuard<'a, Inner<T>>;
 

	
 
impl<T> QueueDynMpsc<T> {
 
    /// Constructs a new MPSC queue. Note that the initial capacity is always
 
    /// increased to the next power of 2 (if it isn't already).
 
    pub fn new(initial_capacity: usize) -> Self {
 
        let initial_capacity = initial_capacity.next_power_of_two();
 
        assert_correct_capacity(initial_capacity);
 

	
 
        let mut data = RawArray::new();
 
        data.resize(initial_capacity);
 

	
 
        let initial_capacity = initial_capacity as u32;
 

	
 
        return Self{
 
            inner: Box::new(Shared {
 
                data: UnfairSeLock::new(Inner{
 
                    data,
 
                    compare_mask: (2 * initial_capacity) - 1,
 
                    read_mask: initial_capacity - 1,
 
                }),
 
                read_head: AtomicU32::new(0),
 
                write_head: AtomicU32::new(initial_capacity),
 
                limit_head: AtomicU32::new(initial_capacity),
 
                #[cfg(debug_assertions)] dbg: AtomicU32::new(0),
 
            }),
 
        };
 
    }
 

	
 
    #[inline]
 
    pub fn producer(&self) -> QueueDynProducer<T> {
 
        return QueueDynProducer::new(self);
 
    }
 

	
 
    /// Perform an attempted read from the queue. It might be that some producer
 
    /// is putting something in the queue while this function is executing, and
 
    /// we don't get the consume it.
 
    pub fn pop(&mut self) -> Option<T> {
 
        let data_lock = self.inner.data.lock_shared();
 
        let cur_read = self.inner.read_head.load(Ordering::Acquire);
 
        let cur_limit = self.inner.limit_head.load(Ordering::Acquire);
 
        let buf_size = data_lock.data.cap() as u32;
 

	
 
        if (cur_read + buf_size) & data_lock.compare_mask != cur_limit {
 
            // Make a bitwise copy of the value and return it. The receiver is
 
            // responsible for dropping it.
 
            unsafe {
 
                let source = data_lock.data.get((cur_read & data_lock.read_mask) as usize);
 
                let value = std::ptr::read(source);
 
                // We can perform a store since we're the only ones modifying
 
                // the atomic.
 
                self.inner.read_head.store((cur_read + 1) & data_lock.compare_mask, Ordering::Release);
 
                return Some(value);
 
            }
 
        } else {
 
            return None;
 
        }
 
    }
 
}
 

	
 
impl<T> Drop for QueueDynMpsc<T> {
 
    fn drop(&mut self) {
 
        // There should be no more `QueueDynProducer` pointers to this queue
 
        dbg_code!(assert_eq!(self.inner.dbg.load(Ordering::Acquire), 0));
 
        // And so the limit head should be equal to the write head
 
        let data_lock = self.inner.data.lock_shared();
 
        let write_index = self.inner.write_head.load(Ordering::Acquire);
 
        assert_eq!(self.inner.limit_head.load(Ordering::Acquire), write_index);
 

	
 
        // Every item that has not yet been taken out of the queue needs to
 
        // have its destructor called. We immediately apply the
 
        // increment-by-size trick and wait until we've hit the write head.
 
        let mut read_index = self.inner.read_head.load(Ordering::Acquire);
 
        read_index += data_lock.data.cap() as u32;
 
        while read_index & data_lock.compare_mask != write_index {
 
            unsafe {
 
                let target = data_lock.data.get((read_index & data_lock.read_mask) as usize);
 
                std::ptr::drop_in_place(target);
 
            }
 
            read_index += 1;
 
        }
 
    }
 
}
 

	
 
pub struct QueueDynProducer<T> {
 
    queue: *const Shared<T>,
 
}
 

	
 
impl<T> QueueDynProducer<T> {
 
    fn new(consumer: &QueueDynMpsc<T>) -> Self {
 
        dbg_code!(consumer.inner.dbg.fetch_add(1, Ordering::AcqRel));
 
        unsafe {
 
            // If you only knew the power of the dark side! Obi-Wan never told
 
            // you what happened to your father!
 
            let queue: *const _ = std::mem::transmute(consumer.inner.as_ref());
 
            return Self{ queue };
 
        }
 
    }
 

	
 
    pub fn push(&self, value: T) {
 
        let queue = unsafe{ &*self.queue };
 

	
 
        let mut data_lock = queue.data.lock_shared();
 
        let mut write_index = queue.write_head.load(Ordering::Acquire);
 

	
 
        'attempt_write: loop {
 
            let read_index = queue.read_head.load(Ordering::Acquire);
 

	
 
            if write_index == read_index { // both stored as [0, 2*capacity), so we can check equality without bitwise ANDing
 
                // Need to resize, try loading read/write index afterwards
 
                let expected_capacity = data_lock.data.cap();
 
                data_lock = self.resize(data_lock, expected_capacity);
 
                write_index = queue.write_head.load(Ordering::Acquire);
 
                continue 'attempt_write;
 
            }
 

	
 
            // If here try to advance write index
 
            let new_write_index = (write_index + 1) & data_lock.compare_mask;
 
            if let Err(actual_write_index) = queue.write_head.compare_exchange(
 
                write_index, new_write_index, Ordering::AcqRel, Ordering::Acquire
 
            ) {
 
                write_index = actual_write_index;
 
                continue 'attempt_write;
 
            }
 

	
 
            // We're now allowed to write at `write_index`
 
            unsafe {
 
                std::ptr::write(data_lock.data.get((write_index & data_lock.read_mask) as usize), value);
 
            }
 

	
 
            // Update limit head to let reader obtain the written value in a
 
            // CAS-loop
 
            while let Err(_) = queue.limit_head.compare_exchange_weak(
 
                write_index, new_write_index,
 
                Ordering::AcqRel, Ordering::Relaxed
 
            ) {}
 

	
 
            return;
 
        }
 
    }
 

	
 
    fn resize(&self, shared_lock: InnerRead<T>, expected_capacity: usize) -> InnerRead<T> {
 
        drop(shared_lock);
 
        let queue = unsafe{ &*self.queue };
 

	
 
        {
 
            let mut exclusive_lock = queue.data.lock_exclusive();
 

	
 
            // We hold the exclusive lock, but someone else might have done the resizing, and so:
 
            if exclusive_lock.data.cap() == expected_capacity {
 
                let old_capacity = expected_capacity;
 
                let new_capacity = 2 * old_capacity;
 
                assert_correct_capacity(new_capacity);
 

	
 
                // Resize by a factor of two, and make the two halves identical.
 
                exclusive_lock.data.resize(new_capacity);
 
                for idx in old_capacity..new_capacity {
 
                    unsafe {
 
                        let target = exclusive_lock.data.get(idx);
 
                        let source = exclusive_lock.data.get(idx - old_capacity);
 
                        std::ptr::write(target, std::ptr::read(source));
 
                    }
 
                }
 

	
 
                // Modify all atomics to reflect that we just resized the
 
                // underlying buffer. We have that everything between the read
 
                // index and the write index is readable. And the following
 
                // preserves that property, while increasing the size from
 
                // `old_capacity` to `new_capacity`.
 
                // Note that the addition of `new_capacity` to `write_head` is
 
                // to ensure the ringbuffer can distinguish the cases where the
 
                // ringbuffer is full, and when it is empty.
 
                let mut read_index = queue.read_head.load(Ordering::Acquire);
 
                let mut write_index = queue.write_head.load(Ordering::Acquire);
 
                debug_assert_eq!(write_index, queue.limit_head.load(Ordering::Acquire)); // since we have exclusive access
 

	
 
                let is_full = read_index == write_index; // before bitwise AND-mask
 
                read_index &= exclusive_lock.read_mask;
 
                write_index &= exclusive_lock.read_mask;
 

	
 
                let new_capacity = new_capacity as u32;
 
                if read_index <= write_index && !is_full { // which means: (read index < write_index) || buffer_is_empty
 
                    // The readable elements do not wrap around the ringbuffer
 
                    write_index += new_capacity;
 
                } else {
 
                    // The readable elements do wrap around the ringbuffer
 
                    write_index += old_capacity as u32;
 
                    write_index += new_capacity;
 
                }
 

	
 
                queue.read_head.store(read_index, Ordering::Release);
 
                queue.limit_head.store(write_index, Ordering::Release);
 
                queue.write_head.store(write_index, Ordering::Release);
 

	
 
                // Update the masks
 
                exclusive_lock.read_mask = new_capacity - 1;
 
                exclusive_lock.compare_mask = (2 * new_capacity) - 1;
 
            }
 
        }
 

	
 
        // Reacquire shared lock
 
        return queue.data.lock_shared();
 
    }
 
}
 

	
 
impl<T> Drop for QueueDynProducer<T> {
 
    fn drop(&mut self) {
 
        dbg_code!(unsafe{ (*self.queue).dbg.fetch_sub(1, Ordering::AcqRel) });
 
    }
 
}
 

	
 
// producer end is `Send`, because in debug mode we make sure that there are no
 
// more producers when the queue is destroyed. But is not sync, because that
 
// would circumvent our atomic counter shenanigans.
 
unsafe impl<T> Send for QueueDynProducer<T>{}
 

	
 
#[inline]
 
fn assert_correct_capacity(capacity: usize) {
 
    assert!(capacity.is_power_of_two() && capacity < (u32::MAX as usize) / 2);
 
}
 

	
 
#[cfg(test)]
 
mod tests {
 
    use super::*;
 
    use super::super::tests::*;
 

	
 
    fn queue_size<T>(queue: &QueueDynMpsc<T>) -> usize {
 
        let lock = queue.inner.data.lock_exclusive();
 
        return lock.data.cap();
 
    }
 

	
 
    #[test]
 
    fn single_threaded_fixed_size_push_pop() {
 
        const INIT_SIZE: usize = 16;
 
        const NUM_ROUNDS: usize = 3;
 
        let mut cons = QueueDynMpsc::new(INIT_SIZE);
 
        let prod = cons.producer();
 

	
 
        for _round in 0..3 {
 
        let counters = Counters::new();
 

	
 
        for _round in 0..NUM_ROUNDS {
 
            // Fill up with indices
 
            for idx in 0..INIT_SIZE {
 
                prod.push(idx);
 
                prod.push(Resource::new(&counters, idx as u64));
 
            }
 

	
 
            // Take out indices and check
 
            for idx in 0..INIT_SIZE {
 
                let gotten = cons.pop().unwrap();
 
                assert_eq!(idx, gotten);
 
                assert_eq!(idx as u64, gotten.val);
 
            }
 

	
 
            assert!(cons.pop().is_none()); // nothing left in queue
 
            assert_eq!(queue_size(&cons), INIT_SIZE); // queue still of same size
 
        }
 

	
 
        let num_expected = (INIT_SIZE * NUM_ROUNDS) as u64;
 
        assert_ctor_eq!(counters, num_expected);
 
        assert_dtor_eq!(counters, num_expected);
 
    }
 

	
 
    #[test]
 
    fn single_threaded_resizing_push_pop() {
 
        const INIT_SIZE: usize = 8;
 
        const NUM_RESIZE: usize = 3; // note: each resize increases capacity by factor of two
 

	
 
        let mut cons = QueueDynMpsc::new(INIT_SIZE);
 
        let prod = cons.producer();
 

	
 
        let counters = Counters::new();
 

	
 
        for resize_idx in 0..NUM_RESIZE {
 
            // Fill up with indices, one more than the size
 
            let cur_size = INIT_SIZE << resize_idx;
 
            let new_size = cur_size << 1;
 
            for idx in 0..new_size {
 
                prod.push(idx);
 
                prod.push(Resource::new(&counters, idx as u64));
 
            }
 

	
 
            for idx in 0..new_size {
 
                let gotten = cons.pop().unwrap();
 
                assert_eq!(idx, gotten);
 
                assert_eq!(idx as u64, gotten.val);
 
            }
 

	
 
            assert!(cons.pop().is_none());
 
            assert_eq!(queue_size(&cons), new_size);
 
        }
 

	
 
        assert_eq!(queue_size(&cons), INIT_SIZE << NUM_RESIZE);
 

	
 
        // Bit trickery supremo (fails if INIT_SIZE is not a power of two)!
 
        let num_expected = ((INIT_SIZE << (NUM_RESIZE + 1)) - 1 - ((INIT_SIZE << 1) - 1)) as u64;
 
        assert_ctor_eq!(counters, num_expected);
 
        assert_dtor_eq!(counters, num_expected);
 
    }
 

	
 
    #[test]
 
    fn single_threaded_alternating_push_pop() {
 
        const INIT_SIZE: usize = 32;
 
        const NUM_ROUNDS: usize = 4;
 
        const NUM_PROD: usize = 4;
 
        assert!(INIT_SIZE % NUM_PROD == 0);
 

	
 
        let mut cons = QueueDynMpsc::new(INIT_SIZE);
 
        let mut prods = Vec::with_capacity(NUM_PROD);
 
        for _ in 0..NUM_PROD {
 
            prods.push(cons.producer());
 
        }
 

	
 
        for _round_idx in 0..4 {
 
        let counters = Counters::new();
 

	
 
        for _round_idx in 0..NUM_ROUNDS {
 
            // Fill up, alternating per producer
 
            let mut prod_idx = 0;
 
            for idx in 0..INIT_SIZE {
 
                let prod = &prods[prod_idx];
 
                prod_idx += 1;
 
                prod_idx %= NUM_PROD;
 
                prod.push(idx);
 
                prod.push(Resource::new(&counters, idx as u64));
 
            }
 

	
 
            // Retrieve and check again
 
            for idx in 0..INIT_SIZE {
 
                let gotten = cons.pop().unwrap();
 
                assert_eq!(idx, gotten);
 
                assert_eq!(idx as u64, gotten.val);
 
            }
 

	
 
            assert!(cons.pop().is_none());
 
            assert_eq!(queue_size(&cons), INIT_SIZE);
 
        }
 

	
 
        let num_expected = (NUM_ROUNDS * INIT_SIZE) as u64;
 
        assert_ctor_eq!(counters, num_expected);
 
        assert_dtor_eq!(counters, num_expected);
 
    }
 

	
 
    #[test]
 
    fn partially_filled_cleanup() {
 
        // Init at 16, fill until 8, take out 4, 4 destructors not called before
 
        // queue consumer side is dropped
 
        let mut cons = QueueDynMpsc::new(16);
 
        let mut prod = cons.producer();
 

	
 
        let counters = Counters::new();
 

	
 
        for _ in 0..8 {
 
            prod.push(Resource::new(&counters, 0));
 
        }
 

	
 
        for _ in 0..4 {
 
            cons.pop().expect("a value");
 
        }
 

	
 
        assert_ctor_eq!(counters, 8);
 
        assert_dtor_eq!(counters, 4);
 
        drop(prod);
 
        drop(cons);
 
        assert_ctor_eq!(counters, 8);
 
        assert_dtor_eq!(counters, 8);
 
    }
 

	
 
    #[test]
 
    fn multithreaded_production_and_consumption() {
 
        use std::sync::{Arc, Mutex};
 

	
 
        // Rather randomized test. Kind of a stress test. We let the producers
 
        // produce `u64` values with the high bits containing their identifier.
 
        // The consumer will try receive as fast as possible until each thread
 
        // has produced the expected number of values.
 
        const NUM_STRESS_TESTS: usize = 2;
 
        const NUM_PER_THREAD: usize = 4096;
 
        const NUM_PROD_THREADS: usize = 4;
 

	
 
        fn take_num_thread_idx(number: u64) -> u64 { return (number >> 32) & 0xFFFFFFFF; }
 
        fn take_num(number: u64) -> u64 { return number & 0xFFFFFFFF; }
 

	
 
        // Span queue and producers
 
        for _stress_idx in 0..NUM_STRESS_TESTS {
 
            let mut queue = QueueDynMpsc::new(4);
 
            let mut queue = QueueDynMpsc::<Resource>::new(4);
 
            let mut producers = Vec::with_capacity(NUM_PROD_THREADS);
 
            for _idx in 0..NUM_PROD_THREADS {
 
                producers.push(queue.producer());
 
            }
 

	
 
            let counters = Counters::new();
 

	
 
            // Start up consume thread and let it spin immediately. Note that it
 
            // must die last.
 
            let can_exit_lock = Arc::new(Mutex::new(false));
 
            let mut held_exit_lock = can_exit_lock.lock().unwrap();
 

	
 
            let consume_handle = {
 
                let can_exit_lock = can_exit_lock.clone();
 
                std::thread::spawn(move || {
 
                    let mut counters = [0u64; NUM_PROD_THREADS];
 
                    let mut thread_val_counters = [0u64; NUM_PROD_THREADS];
 
                    let mut num_done = 0;
 
                    while num_done != NUM_PROD_THREADS {
 
                        // Spin until we get something
 
                        let new_value = loop {
 
                            if let Some(value) = queue.pop() {
 
                                break value;
 
                                break value.val;
 
                            }
 
                        };
 

	
 
                        let thread_idx = take_num_thread_idx(new_value);
 
                        let counter = &mut counters[thread_idx as usize];
 
                        let counter = &mut thread_val_counters[thread_idx as usize];
 
                        assert_eq!(*counter, take_num(new_value)); // values per thread arrive in order
 

	
 
                        *counter += 1;
 
                        if *counter == NUM_PER_THREAD as u64 {
 
                            // Finished this one
 
                            num_done += 1;
 
                        }
 
                    }
 

	
 
                    let _exit_guard = can_exit_lock.lock().unwrap();
 
                })
 
            };
 

	
 
            // Set up producer threads
 
            let mut handles = Vec::with_capacity(NUM_PROD_THREADS);
 
            for prod_idx in 0..NUM_PROD_THREADS {
 
                let prod_handle = producers.pop().unwrap();
 
                let counters = counters.clone();
 

	
 
                let handle = std::thread::spawn(move || {
 
                    let base_value = (prod_idx as u64) << 32;
 
                    for number in 0..NUM_PER_THREAD as u64 {
 
                        prod_handle.push(base_value + number);
 
                        prod_handle.push(Resource::new(&counters, base_value + number));
 
                    }
 
                });
 

	
 
                handles.push(handle);
 
            }
 

	
 
            // Wait until all producers finished, then we unlock our held lock and
 
            // we wait until the consumer finishes
 
            for handle in handles {
 
                handle.join().expect("clean producer exit");
 
            }
 

	
 
            drop(held_exit_lock);
 
            consume_handle.join().expect("clean consumer exit");
 

	
 
            let num_expected = (NUM_PER_THREAD * NUM_PROD_THREADS) as u64;
 
            assert_ctor_eq!(counters, num_expected);
 
            assert_dtor_eq!(counters, num_expected);
 
        }
 
    }
 
}
 
\ No newline at end of file
src/runtime2/store/tests.rs
Show inline comments
 
new file 100644
 
pub use std::sync::Arc;
 
pub use std::sync::atomic::{AtomicU64, Ordering};
 

	
 
// Little wrapper for the two atomic ctor/dtor counters
 
#[derive(Clone)]
 
pub struct Counters {
 
    pub ctor: Arc<AtomicU64>,
 
    pub dtor: Arc<AtomicU64>,
 
}
 

	
 
impl Counters {
 
    pub fn new() -> Self {
 
        return Self{
 
            ctor: Arc::new(AtomicU64::new(0)),
 
            dtor: Arc::new(AtomicU64::new(0)),
 
        };
 
    }
 
}
 

	
 
macro_rules! assert_ctor_eq {
 
        ($counters:expr, $count:expr) => {
 
            assert_eq!($counters.ctor.load(Ordering::Acquire), $count);
 
        }
 
    }
 

	
 
macro_rules! assert_dtor_eq {
 
        ($counters:expr, $count:expr) => {
 
            assert_eq!($counters.dtor.load(Ordering::Acquire), $count);
 
        }
 
    }
 

	
 
// Utility resource structure that counts the number of constructors and
 
// destructor calls.
 
pub struct Resource {
 
    dtor: Arc<AtomicU64>,
 
    pub val: u64,
 
}
 

	
 
impl Resource {
 
    pub fn new(counters: &Counters, val: u64) -> Self {
 
        counters.ctor.fetch_add(1, Ordering::SeqCst);
 
        return Self{ dtor: counters.dtor.clone(), val };
 
    }
 
}
 

	
 
impl Drop for Resource {
 
    fn drop(&mut self) {
 
        self.dtor.fetch_add(1, Ordering::SeqCst);
 
    }
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)