Changeset - 1a42eb33aa76
[Not reviewed]
0 8 0
MH - 4 years ago 2021-04-15 17:47:02
contact@maxhenger.nl
WIP on compiler rearchitecting
8 files changed with 569 insertions and 190 deletions:
0 comments (0 inline, 0 general)
src/collections/string_pool.rs
Show inline comments
 
use std::ptr::null_mut;
 
use std::collections::hash_map::DefaultHasher;
 
use std::hash::{Hash, Hasher};
 
use std::marker::PhantomData;
 

	
 
const SLAB_SIZE: usize = u16::max_value() as usize;
 

	
 
#[derive(Clone)]
 
pub struct StringRef {
 
pub struct StringRef<'a> {
 
    data: *const u8,
 
    length: usize,
 
    _phantom: PhantomData<&'a [u8]>,
 
}
 

	
 
impl StringRef {
 
    pub fn as_str<'a>(&'a self) -> &'a str {
 
impl<'a> StringRef<'a> {
 
    /// `new` constructs a new StringRef whose data is not owned by the
 
    /// `StringPool`, hence cannot have a `'static` lifetime.
 
    pub(crate) fn new(data: &'a [u8]) -> StringRef<'a> {
 
        // This is an internal (compiler) function: so debug_assert that the
 
        // string is valid UTF8. Most commonly the input will come from the
 
        // code's source file, which is checked for ASCII-ness anyway.
 
        debug_assert!(data.is_ascii());
 
        let length = data.len();
 
        let data = data.as_ptr();
 
        StringRef{ data, length, _phantom: PhantomData }
 
    }
 

	
 
    pub fn as_str(&self) -> &'a str {
 
        unsafe {
 
            let slice = std::slice::from_raw_parts::<'a, u8>(self.data, self.length);
 
            std::str::from_utf8_unchecked(slice)
 
@@ -66,7 +80,7 @@ impl StringPool {
 
    }
 

	
 
    // Interns a string to the StringPool, returning a reference to it
 
    pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef {
 
    pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef<'static> {
 
        // TODO: Large string allocations, if ever needed.
 
        let data_len = data.len();
 
        assert!(data_len <= SLAB_SIZE, "string is too large for slab");
 
@@ -88,7 +102,7 @@ impl StringPool {
 

	
 
        unsafe {
 
            let start = last.data.as_ptr().offset(range_start as isize);
 
            StringRef{ data: start, length: data_len }
 
            StringRef{ data: start, length: data_len, _phantom: PhantomData }
 
        }
 
    }
 

	
src/protocol/arena.rs
Show inline comments
 
@@ -3,14 +3,16 @@ use core::hash::Hash;
 
use core::marker::PhantomData;
 

	
 
pub struct Id<T> {
 
    pub(crate) index: u32,
 
    // Not actually a signed index into the heap. But the index is set to -1 if
 
    // we don't know an ID yet. This is checked during debug mode.
 
    pub(crate) index: i32,
 
    _phantom: PhantomData<T>,
 
}
 

	
 
impl<T> Id<T> {
 
    pub(crate) fn new(index: u32) -> Self {
 
        Self{ index, _phantom: Default::default() }
 
    }
 
    pub(crate) fn new_invalid() -> Self     { Self{ index: -1, _phantom: Default::default() } }
 
    pub(crate) fn new(index: i32) -> Self   { Self{ index, _phantom: Default::default() } }
 
    pub(crate) fn is_invalid(&self) -> bool { self.index < 0 }
 
}
 

	
 
#[derive(Debug)]
 
@@ -46,15 +48,19 @@ impl<T> Arena<T> {
 
    pub fn new() -> Self {
 
        Self { store: vec![] }
 
    }
 

	
 
    pub fn alloc_with_id(&mut self, f: impl FnOnce(Id<T>) -> T) -> Id<T> {
 
        use std::convert::TryFrom;
 
        let id = Id::new(u32::try_from(self.store.len()).expect("Out of capacity!"));
 
        // Lets keep this a runtime assert.
 
        assert!(self.store.len() < i32::max_value() as usize, "Arena out of capacity");
 
        let id = Id::new(self.store.len() as i32);
 
        self.store.push(f(id));
 
        id
 
    }
 

	
 
    pub fn iter(&self) -> impl Iterator<Item = &T> {
 
        self.store.iter()
 
    }
 

	
 
    pub fn len(&self) -> usize {
 
        self.store.len()
 
    }
 
@@ -62,11 +68,13 @@ impl<T> Arena<T> {
 
impl<T> core::ops::Index<Id<T>> for Arena<T> {
 
    type Output = T;
 
    fn index(&self, id: Id<T>) -> &Self::Output {
 
        debug_assert!(!id.is_invalid(), "attempted to index into Arena with an invalid id (index < 0)");
 
        self.store.index(id.index as usize)
 
    }
 
}
 
impl<T> core::ops::IndexMut<Id<T>> for Arena<T> {
 
    fn index_mut(&mut self, id: Id<T>) -> &mut Self::Output {
 
        debug_assert!(!id.is_invalid(), "attempted to index_mut into Arena with an invalid id (index < 0)");
 
        self.store.index_mut(id.index as usize)
 
    }
 
}
 
\ No newline at end of file
src/protocol/ast.rs
Show inline comments
 
@@ -15,7 +15,6 @@ use crate::protocol::input_source2::{InputPosition2, InputSpan};
 
const MAX_LEVEL: usize = 128;
 
const MAX_NAMESPACES: usize = 64;
 

	
 

	
 
/// Helper macro that defines a type alias for a AST element ID. In this case 
 
/// only used to alias the `Id<T>` types.
 
macro_rules! define_aliased_ast_id {
 
@@ -66,9 +65,9 @@ macro_rules! define_new_ast_id {
 
        pub struct $name (pub(crate) $parent);
 

	
 
        impl $name {
 
            pub fn upcast(self) -> $parent {
 
                self.0
 
            }
 
            pub(crate) fn new_invalid() -> Self     { Self($parent::new_invalid()) }
 
            pub(crate) fn is_invalid(&self) -> bool { self.0.is_invalid() }
 
            pub fn upcast(self) -> $parent          { self.0 }
 
        }
 
    };
 
    // Variant where we define the type, and the Index and IndexMut traits
 
@@ -127,11 +126,11 @@ define_new_ast_id!(ParameterId, VariableId, index(Parameter, Variable::Parameter
 
define_new_ast_id!(LocalId, VariableId, index(Local, Variable::Local, variables), alloc(alloc_local));
 

	
 
define_aliased_ast_id!(DefinitionId, Id<Definition>, index(Definition, definitions));
 
define_new_ast_id!(StructId, DefinitionId, index(StructDefinition, Definition::Struct, definitions), alloc(alloc_struct_definition));
 
define_new_ast_id!(EnumId, DefinitionId, index(EnumDefinition, Definition::Enum, definitions), alloc(alloc_enum_definition));
 
define_new_ast_id!(UnionId, DefinitionId, index(UnionDefinition, Definition::Union, definitions), alloc(alloc_union_definition));
 
define_new_ast_id!(ComponentId, DefinitionId, index(Component, Definition::Component, definitions), alloc(alloc_component));
 
define_new_ast_id!(FunctionId, DefinitionId, index(Function, Definition::Function, definitions), alloc(alloc_function));
 
define_new_ast_id!(StructDefinitionId, DefinitionId, index(StructDefinition, Definition::Struct, definitions), alloc(alloc_struct_definition));
 
define_new_ast_id!(EnumDefinitionId, DefinitionId, index(EnumDefinition, Definition::Enum, definitions), alloc(alloc_enum_definition));
 
define_new_ast_id!(UnionDefinitionId, DefinitionId, index(UnionDefinition, Definition::Union, definitions), alloc(alloc_union_definition));
 
define_new_ast_id!(ComponentDefinitionId, DefinitionId, index(ComponentDefinition, Definition::Component, definitions), alloc(alloc_component_definition));
 
define_new_ast_id!(FunctionDefinitionId, DefinitionId, index(FunctionDefinition, Definition::Function, definitions), alloc(alloc_function_definition));
 

	
 
define_aliased_ast_id!(StatementId, Id<Statement>, index(Statement, statements));
 
define_new_ast_id!(BlockStatementId, StatementId, index(BlockStatement, Statement::Block, statements), alloc(alloc_block_statement));
 
@@ -328,11 +327,10 @@ impl SyntaxElement for Import {
 
pub struct ImportModule {
 
    pub this: ImportId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub module_name: Vec<u8>,
 
    pub span: InputSpan,
 
    pub module_name: Identifier,
 
    pub alias: Identifier,
 
    // Phase 2: module resolving
 
    pub module_id: Option<RootId>,
 
    pub module_id: RootId,
 
}
 

	
 
#[derive(Debug, Clone)]
 
@@ -349,7 +347,7 @@ pub struct AliasedSymbol {
 
pub struct ImportSymbols {
 
    pub this: ImportId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub module_name: Vec<u8>,
 
    // Phase 2: module resolving
 
    pub module_id: Option<RootId>,
 
@@ -363,7 +361,7 @@ pub struct ImportSymbols {
 
#[derive(Debug, Clone)]
 
pub struct Identifier {
 
    pub span: InputSpan,
 
    pub value: StringRef,
 
    pub value: StringRef<'static>,
 
}
 

	
 
impl PartialEq for Identifier {
 
@@ -965,8 +963,8 @@ pub enum Definition {
 
    Struct(StructDefinition),
 
    Enum(EnumDefinition),
 
    Union(UnionDefinition),
 
    Component(Component),
 
    Function(Function),
 
    Component(ComponentDefinition),
 
    Function(FunctionDefinition),
 
}
 

	
 
impl Definition {
 
@@ -1012,7 +1010,7 @@ impl Definition {
 
            _ => false,
 
        }
 
    }
 
    pub fn as_component(&self) -> &Component {
 
    pub fn as_component(&self) -> &ComponentDefinition {
 
        match self {
 
            Definition::Component(result) => result,
 
            _ => panic!("Unable to cast `Definition` to `Component`"),
 
@@ -1024,7 +1022,7 @@ impl Definition {
 
            _ => false,
 
        }
 
    }
 
    pub fn as_function(&self) -> &Function {
 
    pub fn as_function(&self) -> &FunctionDefinition {
 
        match self {
 
            Definition::Function(result) => result,
 
            _ => panic!("Unable to cast `Definition` to `Function`"),
 
@@ -1094,14 +1092,20 @@ pub struct StructFieldDefinition {
 

	
 
#[derive(Debug, Clone)]
 
pub struct StructDefinition {
 
    pub this: StructId,
 
    pub this: StructDefinitionId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub identifier: Identifier,
 
    pub poly_vars: Vec<Identifier>,
 
    pub fields: Vec<StructFieldDefinition>
 
}
 

	
 
impl StructDefinition {
 
    pub(crate) fn new_empty(this: StructDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
 
        Self{ this, span, identifier, poly_vars: Vec::new(), fields: Vec::new() }
 
    }
 
}
 

	
 
#[derive(Debug, Clone)]
 
pub enum EnumVariantValue {
 
    None,
 
@@ -1117,14 +1121,20 @@ pub struct EnumVariantDefinition {
 

	
 
#[derive(Debug, Clone)]
 
pub struct EnumDefinition {
 
    pub this: EnumId,
 
    pub this: EnumDefinitionId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub identifier: Identifier,
 
    pub poly_vars: Vec<Identifier>,
 
    pub variants: Vec<EnumVariantDefinition>,
 
}
 

	
 
impl EnumDefinition {
 
    pub(crate) fn new_empty(this: EnumDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
 
        Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() }
 
    }
 
}
 

	
 
#[derive(Debug, Clone)]
 
pub enum UnionVariantValue {
 
    None,
 
@@ -1140,14 +1150,20 @@ pub struct UnionVariantDefinition {
 

	
 
#[derive(Debug, Clone)]
 
pub struct UnionDefinition {
 
    pub this: UnionId,
 
    pub this: UnionDefinitionId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub identifier: Identifier,
 
    pub poly_vars: Vec<Identifier>,
 
    pub variants: Vec<UnionVariantDefinition>,
 
}
 

	
 
impl UnionDefinition {
 
    pub(crate) fn new_empty(this: UnionDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
 
        Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() }
 
    }
 
}
 

	
 
#[derive(Debug, Clone, Copy)]
 
pub enum ComponentVariant {
 
    Primitive,
 
@@ -1155,10 +1171,10 @@ pub enum ComponentVariant {
 
}
 

	
 
#[derive(Debug, Clone)]
 
pub struct Component {
 
    pub this: ComponentId,
 
pub struct ComponentDefinition {
 
    pub this: ComponentDefinitionId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub variant: ComponentVariant,
 
    pub identifier: Identifier,
 
    pub poly_vars: Vec<Identifier>,
 
@@ -1166,17 +1182,28 @@ pub struct Component {
 
    pub body: StatementId,
 
}
 

	
 
impl SyntaxElement for Component {
 
impl ComponentDefinition {
 
    pub(crate) fn new_empty(this: ComponentDefinitionId, span: InputSpan, variant: ComponentVariant, identifier: Identifier) -> Self {
 
        Self{ 
 
            this, span, variant, identifier, 
 
            poly_vars: Vec::new(), 
 
            parameters: Vec::new(), 
 
            body: StatementId::new_invalid()
 
        }
 
    }
 
}
 

	
 
impl SyntaxElement for ComponentDefinition {
 
    fn position(&self) -> InputPosition {
 
        self.position
 
    }
 
}
 

	
 
#[derive(Debug, Clone)]
 
pub struct Function {
 
    pub this: FunctionId,
 
pub struct FunctionDefinition {
 
    pub this: FunctionDefinitionId,
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub span: InputSpan,
 
    pub return_type: ParserTypeId,
 
    pub identifier: Identifier,
 
    pub poly_vars: Vec<Identifier>,
 
@@ -1184,7 +1211,19 @@ pub struct Function {
 
    pub body: StatementId,
 
}
 

	
 
impl SyntaxElement for Function {
 
impl FunctionDefinition {
 
    pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
 
        Self {
 
            this, span, identifier,
 
            return_type: ParserTypeId::new_invalid(),
 
            poly_vars: Vec::new(),
 
            parameters: Vec::new(),
 
            body: StatementId::new_invalid(),
 
        }
 
    }
 
}
 

	
 
impl SyntaxElement for FunctionDefinition {
 
    fn position(&self) -> InputPosition {
 
        self.position
 
    }
src/protocol/lexer.rs
Show inline comments
 
@@ -2378,7 +2378,7 @@ impl Lexer<'_> {
 

	
 
        // Parse body
 
        let body = self.consume_block_statement(h)?;
 
        Ok(h.alloc_component(|this| Component { 
 
        Ok(h.alloc_component(|this| ComponentDefinition {
 
            this,
 
            variant: ComponentVariant::Composite,
 
            position,
 
@@ -2404,7 +2404,7 @@ impl Lexer<'_> {
 

	
 
        // Consume body
 
        let body = self.consume_block_statement(h)?;
 
        Ok(h.alloc_component(|this| Component { 
 
        Ok(h.alloc_component(|this| ComponentDefinition {
 
            this,
 
            variant: ComponentVariant::Primitive,
 
            position,
 
@@ -2430,7 +2430,7 @@ impl Lexer<'_> {
 

	
 
        // Consume body
 
        let body = self.consume_block_statement(h)?;
 
        Ok(h.alloc_function(|this| Function {
 
        Ok(h.alloc_function(|this| FunctionDefinition {
 
            this,
 
            position,
 
            return_type,
src/protocol/lexer2.rs
Show inline comments
 
@@ -25,28 +25,13 @@ enum KeywordDefinition {
 
    Composite,
 
}
 

	
 
impl KeywordDefinition {
 
    fn as_symbol_class(&self) -> SymbolClass {
 
        use KeywordDefinition as KD;
 
        use SymbolClass as SC;
 

	
 
        match self {
 
            KD::Struct => SC::Struct,
 
            KD::Enum => SC::Enum,
 
            KD::Union => SC::Union,
 
            KD::Function => SC::Function,
 
            KD::Primitive | KD::Composite => SC::Component,
 
        }
 
    }
 
}
 

	
 
struct Module {
 
    // Buffers
 
    source: InputSource,
 
    tokens: TokenBuffer,
 
    // Identifiers
 
    root_id: RootId,
 
    name: Option<(PragmaId, StringRef)>,
 
    name: Option<(PragmaId, StringRef<'static>)>,
 
    version: Option<(PragmaId, i64)>,
 
    phase: ModuleCompilationPhase,
 
}
 
@@ -61,19 +46,23 @@ struct Ctx<'a> {
 
/// added to the symbol table such that during AST-construction we know which
 
/// identifiers point to types. Will also parse all pragmas to determine module
 
/// names.
 
pub(crate) struct ASTSymbolPrePass {
 
pub(crate) struct PassPreSymbol {
 
    symbols: Vec<Symbol>,
 
    pragmas: Vec<PragmaId>,
 
    imports: Vec<ImportId>,
 
    definitions: Vec<DefinitionId>,
 
    buffer: String,
 
    has_pragma_version: bool,
 
    has_pragma_module: bool,
 
}
 

	
 
impl ASTSymbolPrePass {
 
impl PassPreSymbol {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            symbols: Vec::with_capacity(128),
 
            pragmas: Vec::with_capacity(8),
 
            imports: Vec::with_capacity(32),
 
            definitions: Vec::with_capacity(128),
 
            buffer: String::with_capacity(128),
 
            has_pragma_version: false,
 
            has_pragma_module: false,
 
@@ -83,6 +72,8 @@ impl ASTSymbolPrePass {
 
    fn reset(&mut self) {
 
        self.symbols.clear();
 
        self.pragmas.clear();
 
        self.imports.clear();
 
        self.definitions.clear();
 
        self.has_pragma_version = false;
 
        self.has_pragma_module = false;
 
    }
 
@@ -92,11 +83,10 @@ impl ASTSymbolPrePass {
 

	
 
        let module = &mut modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 
        let expected_parent_idx = 0;
 
        let expected_subranges = module_range.subranges;
 

	
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 
        debug_assert_eq!(module.root_id.index, 0);
 
        debug_assert!(module.root_id.is_invalid()); // not set yet,
 

	
 
        // Preallocate root in the heap
 
        let root_id = ctx.heap.alloc_protocol_description(|this| {
 
@@ -109,44 +99,45 @@ impl ASTSymbolPrePass {
 
        });
 
        module.root_id = root_id;
 

	
 
        // Visit token ranges to detect defintions
 
        let mut visited_subranges = 0;
 
        for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
 
            // Skip any ranges that do not belong to the module
 
            let cur_range = &module.tokens.ranges[range_idx];
 
            if cur_range.parent_idx != expected_parent_idx {
 
                continue;
 
            }
 
        // Visit token ranges to detect definitions and pragmas
 
        let mut range_idx = module_range.first_child_idx;
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 

	
 
            // Parse if it is a definition or a pragma
 
            if cur_range.range_kind == TokenRangeKind::Definition {
 
                self.visit_definition_range(modules, module_idx, ctx, range_idx)?;
 
                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
 
            } else if cur_range.range_kind == TokenRangeKind::Pragma {
 
                self.visit_pragma_range(modules, module_idx, ctx, range_idx)?;
 
                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
 
            }
 

	
 
            visited_subranges += 1;
 
            if visited_subranges == expected_subranges {
 
                break;
 
            match cur_range.next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; },
 
            }
 
        }
 

	
 
        // By now all symbols should have been found: add to symbol table and
 
        // add the parsed pragmas to the preallocated root in the heap.
 
        debug_assert_eq!(visited_subranges, expected_subranges);
 
        ctx.symbols.insert_scoped_symbols(None, SymbolScope::Module(module.root_id), &self.symbols);
 
        // Add the module's symbol scope and the symbols we just parsed
 
        let module_scope = SymbolScope::Module(root_id);
 
        ctx.symbols.insert_scope(None, module_scope);
 
        for symbol in self.symbols.drain(..) {
 
            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
 
                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol))
 
            }
 
        }
 

	
 
        // Modify the preallocated root
 
        let root = &mut ctx.heap[root_id];
 
        debug_assert!(root.pragmas.is_empty());
 
        root.pragmas.extend(&self.pragmas);
 

	
 
        root.pragmas.extend(self.pragmas.drain(..));
 
        root.definitions.extend(self.definitions.drain(..));
 
        module.phase = ModuleCompilationPhase::DefinitionsScanned;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
 
        let module = &mut modules[module_idx];
 
    fn visit_pragma_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let range = &module.tokens.ranges[range_idx];
 
        let mut iter = module.tokens.iter_range(range);
 

	
 
@@ -213,7 +204,7 @@ impl ASTSymbolPrePass {
 
        Ok(())
 
    }
 

	
 
    fn visit_definition_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
 
    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let range = &module.tokens.ranges[range_idx];
 
        let definition_span = InputSpan::from_positions(
 
@@ -222,60 +213,107 @@ impl ASTSymbolPrePass {
 
        );
 
        let mut iter = module.tokens.iter_range(range);
 

	
 
        // Because we're visiting a definition, we expect an ident that resolves
 
        // to a keyword indicating a definition.
 
        let kw_text = consume_ident_text(&module.source, &mut iter).unwrap();
 
        // First ident must be type of symbol
 
        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
 
        let kw = parse_definition_keyword(kw_text).unwrap();
 

	
 
        // Retrieve identifier and put in temp symbol table
 
        let definition_ident = consume_ident_text(&module.source, &mut iter)?;
 
        let definition_ident = ctx.pool.intern(definition_ident);
 
        let symbol_class = kw.as_symbol_class();
 

	
 
        // Get the token indicating the end of the definition to get the full
 
        // span of the definition
 
        let last_token = &module.tokens.tokens[range.end - 1];
 
        debug_assert_eq!(last_token.kind, TokenKind::CloseCurly);
 
        // Retrieve identifier of definition
 
        let (identifier_text, identifier_span) = consume_ident(&module.source, &mut iter)?;
 
        let ident_text = ctx.pool.intern(identifier_text);
 
        let identifier = Identifier{ span: identifier_span, value: ident_text };
 

	
 
        // Reserve space in AST for definition and add it to the symbol table
 
        let symbol_definition;
 
        let ast_definition_id;
 
        match kw {
 
            KeywordDefinition::Struct => {
 
                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
 
                    StructDefinition::new_empty(this, definition_span, identifier)
 
                });
 
                symbol_definition = SymbolDefinition::Struct(struct_def_id);
 
                ast_definition_id = struct_def_id.upcast();
 
            },
 
            KeywordDefinition::Enum => {
 
                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
 
                    EnumDefinition::new_empty(this, definition_span, identifier)
 
                });
 
                symbol_definition = SymbolDefinition::Enum(enum_def_id);
 
                ast_definition_id = enum_def_id.upcast();
 
            },
 
            KeywordDefinition::Union => {
 
                let union_def_id = ctx.heap.alloc_union_definition(|this| {
 
                    UnionDefinition::new_empty(this, definition_span, identifier)
 
                });
 
                symbol_definition = SymbolDefinition::Union(union_def_id);
 
                ast_definition_id = union_def_id.upcast()
 
            },
 
            KeywordDefinition::Function => {
 
                let func_def_id = ctx.heap.alloc_function_definition(|this| {
 
                    FunctionDefinition::new_empty(this, definition_span, identifier)
 
                });
 
                symbol_definition = SymbolDefinition::Function(func_def_id);
 
                ast_definition_id = func_def_id.upcast();
 
            },
 
            KeywordDefinition::Primitive | KeywordDefinition::Composite => {
 
                let component_variant = if kw == KeywordDefinition::Primitive {
 
                    ComponentVariant::Primitive
 
                } else {
 
                    ComponentVariant::Composite
 
                };
 
                let comp_def_id = ctx.heap.alloc_component_definition(|this| {
 
                    ComponentDefinition::new_empty(this, definition_span, component_variant, identifier)
 
                });
 
                symbol_definition = SymbolDefinition::Component(comp_def_id);
 
                ast_definition_id = comp_def_id.upcast();
 
            }
 
        }
 

	
 
        self.symbols.push(Symbol::new(
 
            module.root_id,
 
            SymbolScope::Module(module.root_id),
 
        let symbol = Symbol{
 
            defined_in_module: module.root_id,
 
            defined_in_scope: SymbolScope::Module(module.root_id),
 
            definition_span,
 
            symbol_class,
 
            definition_ident
 
        ));
 
            identifier_span,
 
            introduced_at: None,
 
            name: definition_ident,
 
            definition: symbol_definition
 
        };
 
        self.symbols.push(symbol);
 
        self.definitions.push(ast_definition_id);
 

	
 
        Ok(())
 
    }
 
}
 

	
 
pub(crate) struct ASTImportPrePass {
 
/// Parses all the imports in the module tokens. Is applied after the
 
/// definitions and name of modules are resolved. Hence we should be able to
 
/// resolve all symbols to their appropriate module/definition.
 
pub(crate) struct PassImport {
 
    imports: Vec<ImportId>,
 
}
 

	
 
impl ASTImportPrePass {
 
impl PassImport {
 
    pub(crate) fn new() -> Self {
 
        Self{ imports: Vec::with_capacity(32) }
 
    }
 
    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 
        debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::DefinitionsScanned));
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::DefinitionsScanned);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 

	
 
        let expected_parent_idx = 0;
 
        let expected_subranges = module_range.subranges;
 
        let mut visited_subranges = 0;
 
        let mut range_idx = module_range.first_child_idx;
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 

	
 
        for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
 
            let cur_range = &module.tokens.ranges[range_idx];
 
            if cur_range.parent_idx != expected_parent_idx {
 
                continue;
 
            }
 

	
 
            visited_subranges += 1;
 
            if cur_range.range_kind == TokenRangeKind::Import {
 
                self.visit_import_range(modules, module_idx, ctx, range_idx)?;
 
                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
 
            }
 

	
 
            if visited_subranges == expected_subranges {
 
                break;
 
            match cur_range.next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; }
 
            }
 
        }
 

	
 
@@ -292,26 +330,81 @@ impl ASTImportPrePass {
 
        let mut iter = module.tokens.iter_range(import_range);
 

	
 
        // Consume "import"
 
        let _import_ident = consume_ident_text(&module.source, &mut iter)?;
 
        let (_import_ident, import_span) =
 
            consume_ident(&module.source, &mut iter)?;
 
        debug_assert_eq!(_import_ident, KW_IMPORT);
 

	
 
        // Consume module name
 
        let (module_name, _) = consume_domain_ident(&module.source, &mut iter)?;
 
        let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
 
        let target_root_id = ctx.symbols.get_module_by_name(module_name);
 
        if target_root_id.is_none() {
 
            return Err(ParseError::new_error_at_span(
 
                &module.source, module_name_span,
 
                format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
 
            ));
 
        }
 
        let module_name = ctx.pool.intern(module_name);
 
        let target_root_id = target_root_id.unwrap();
 

	
 
        // Check for subsequent characters
 
        let next = iter.next();
 
        if has_ident(&module.source, &mut iter, b"as") {
 
            iter.consume();
 
            let (alias_text, alias_span) = consume_ident(source, &mut iter)?;
 
            let alias = ctx.pool.intern(alias_text);
 

	
 
            let import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
 
                this,
 
                span: import_span,
 
                module_name: Identifier{ span: module_name_span, value: module_name },
 
                alias: Identifier{ span: alias_span, value: alias },
 
                module_id: target_root_id
 
            }));
 
            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
 
                defined_in_module: target_root_id,
 
                defined_in_scope: SymbolScope::Module(target_root_id),
 
                definition_span
 
            })
 
        } else if Some(TokenKind::ColonColon) == next {
 
            iter.consume();
 
        } else {
 
            // Assume implicit alias, then check if we get the semicolon next
 
            let module_name_str = module_name.as_str();
 
            let last_ident_start = module_name_str.rfind('.').map_or(0, |v| v + 1);
 
            let alias_text = &module_name_str.as_bytes()[last_ident_start..];
 
            let alias = ctx.pool.intern(alias_text);
 
            let alias_span = InputSpan::from_positions(
 
                module_name_span.begin.with_offset(last_ident_start as u32),
 
                module_name_span.end
 
            );
 
        }
 

	
 
        Ok(())
 
    }
 
}
 

	
 
fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (_, name_start, mut name_end) = consume_ident(source, iter)?;
 
    let (_, mut span) = consume_ident(source, iter)?;
 
    while let Some(TokenKind::Dot) = iter.next() {
 
        consume_dot(source, iter)?;
 
        let (_, _, new_end) = consume_ident(source, iter)?;
 
        name_end = new_end;
 
        let (_, new_span) = consume_ident(source, iter)?;
 
        span.end = new_span.end;
 
    }
 

	
 
    Ok((source.section(name_start, name_end), InputSpan::from_positions(name_start, name_end)))
 
    // Not strictly necessary, but probably a reasonable restriction: this
 
    // simplifies parsing of module naming and imports.
 
    if span.begin.line != span.end.line {
 
        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
 
    }
 

	
 
    // If module name consists of a single identifier, then it may not match any
 
    // of the reserved keywords
 
    let section = source.section(span.begin, span.end);
 
    if is_reserved_keyword(section) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((source.section(span.begin, span.end), span))
 
}
 

	
 
fn consume_dot<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(), ParseError> {
 
@@ -387,22 +480,126 @@ fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(
 
    Ok((source.section(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
fn consume_ident_text<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<&'a [u8], ParseError> {
 
    if Some(TokenKind::Ident) != iter.next() {
 
        return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected an identifier"));
 
fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_range();
 
        return source.section(start, end) == expected;
 
    }
 
    let (ident_start, ident_end) = iter.next_range();
 
    iter.consume();
 
    Ok(source.section(ident_start, ident_end))
 

	
 
    false
 
}
 

	
 
fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_range();
 
        return Some(source.section(start, end))
 
    }
 

	
 
    None
 
}
 

	
 
fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
/// Consumes any identifier and returns it together with its span. Does not
 
/// check if the identifier is a reserved keyword.
 
fn consume_any_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    if Some(TokenKind::Ident) != iter.next() {
 
        return Err(ParseError::new_error(sourcee, iter.last_valid_pos(), "expected an identifier"));
 
    }
 
    let (ident_start, ident_end) = iter.next_range();
 
    iter.consume();
 
    Ok((source.section(ident_start, ident_end), ident_start, ident_end))
 
    Ok((source.section(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
 
}
 

	
 
/// Consumes an identifier that is not a reserved keyword and returns it
 
/// together with its span.
 
fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (ident, span) = consume_any_ident(source, iter)?;
 
    if is_reserved_keyword(ident) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((ident, span))
 
}
 

	
 
fn is_reserved_definition_keyword(text: &[u8]) -> bool {
 
    return ([
 
        b"struct", b"enum", b"union", b"function", b"primitive"
 
    ] as &[[u8]]).contains(text)
 
}
 

	
 
fn is_reserved_statement_keyword(text: &[u8]) -> bool {
 
    return ([
 
        b"channel", b"import", b"as",
 
        b"if", b"while", b"break", b"continue", b"goto", b"return",
 
        b"synchronous", b"assert", b"new",
 
    ] as &[[u8]]).contains(text)
 
}
 

	
 
fn is_reserved_expression_keyword(text: &[u8]) -> bool {
 
    return ([
 
        b"let", b"true", b"false", b"null", // literals
 
        b"get", b"put", b"fires", b"create", b"length", // functions
 
    ] as &[[u8]]).contains(text)
 
}
 

	
 
fn is_reserved_type_keyword(text: &[u8]) -> bool {
 
    return ([
 
        b"in", b"out", b"msg",
 
        b"bool",
 
        b"u8", b"u16", b"u32", b"u64",
 
        b"s8", b"s16", b"s32", b"s64",
 
        b"auto"
 
    ] as &[[u8]]).contains(text)
 
}
 

	
 
fn is_reserved_keyword(text: &[u8]) -> bool {
 
    return
 
        is_reserved_definition_keyword(text) ||
 
        is_reserved_statement_keyword(text) ||
 
        is_reserved_type_keyword(text);
 
}
 

	
 
/// Constructs a human-readable message indicating why there is a conflict of
 
/// symbols.
 
// Note: passing the `module_idx` is not strictly necessary, but will prevent
 
// programmer mistakes during development: we get a conflict because we're
 
// currently parsing a particular module.
 
fn construct_symbol_conflict_error(modules: &[Module], module_idx: usize, ctx: &Ctx, new_symbol: &Symbol, old_symbol: &Symbol) -> ParseError {
 
    let module = &modules[module_idx];
 
    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) {
 
        match symbol.introduced_at {
 
            Some(import_id) => {
 
                // Symbol is being imported
 
                let import = &ctx.heap[import_id];
 
                match import {
 
                    Import::Module(import) => (
 
                        format!("the module aliased as '{}' imported here", symbol.name.as_str()),
 
                        import.span
 
                    ),
 
                    Import::Symbols(symbols) => (
 
                        format!("the type '{}' imported here", symbol.name.as_str()),
 
                        symbols.span
 
                    ),
 
                }
 
            },
 
            None => {
 
                // Symbol is being defined
 
                debug_assert_eq!(symbol.defined_in_module, module.root_id);
 
                debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module);
 
                (
 
                    format!("the type '{}' defined here", symbol.name.as_str()),
 
                    symbol.identifier_span
 
                )
 
            }
 
        }
 
    };
 

	
 
    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
 
    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
 
    return ParseError::new_error_at_span(
 
        &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
 
    ).with_info_at_span(
 
        &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
 
    )
 
}
 

	
 
fn parse_definition_keyword(keyword: &[u8]) -> Option<KeywordDefinition> {
src/protocol/parser/symbol_table2.rs
Show inline comments
 
/// symbol_table.rs
 
///
 
/// The datastructure used to lookup symbols within particular scopes. Scopes
 
/// may be module-level or definition level, although imports and definitions
 
/// within definitions are currently not allowed.
 
///
 
/// TODO: Once the compiler has matured, find out ways to optimize to prevent
 
///     the repeated HashMap lookup.
 

	
 
use std::collections::HashMap;
 
use std::collections::hash_map::Entry;
 

	
 
@@ -5,7 +14,9 @@ use crate::protocol::input_source2::*;
 
use crate::protocol::ast::*;
 
use crate::collections::*;
 

	
 
#[derive(Clone, Copy, PartialEq, Eq)]
 
const RESERVED_SYMBOLS: usize = 32;
 

	
 
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 
pub enum SymbolScope {
 
    Module(RootId),
 
    Definition(DefinitionId),
 
@@ -25,46 +36,61 @@ struct ScopedSymbols {
 
    scope: SymbolScope,
 
    parent_scope: Option<SymbolScope>,
 
    child_scopes: Vec<SymbolScope>,
 
    start: usize,
 
    end: usize,
 
    symbols: Vec<Symbol>,
 
}
 

	
 
pub enum SymbolDefinition {
 
    Module(RootId),
 
    Struct(StructDefinitionId),
 
    Enum(EnumDefinitionId),
 
    Union(UnionDefinitionId),
 
    Function(FunctionDefinitionId),
 
    Component(ComponentDefinitionId),
 
}
 

	
 
impl SymbolDefinition {
 
    pub fn symbol_class(&self) -> SymbolClass {
 
        use SymbolDefinition as SD;
 
        use SymbolClass as SC;
 

	
 
        match self {
 
            SD::Module(_) => SC::Module,
 
            SD::Struct(_) => SC::Struct,
 
            SD::Enum(_) => SC::Enum,
 
            SD::Union(_) => SC::Union,
 
            SD::Function(_) => SC::Function,
 
            SD::Component(_) => SC::Component,
 
        }
 
    }
 
}
 

	
 
pub enum SymbolData {
 
    
 
}
 

	
 
pub struct Symbol {
 
    // Definition location
 
    // Definition location (may be different from the scope/module in which it
 
    // is used if the symbol is imported)
 
    pub defined_in_module: RootId,
 
    pub defined_in_scope: SymbolScope,
 
    pub definition_span: InputSpan, // full span of definition
 
    pub definition_span: InputSpan, // full span of definition, not just the name
 
    pub identifier_span: InputSpan, // span of just the identifier
 
    // Introduction location (if imported instead of defined)
 

	
 
    pub introduced_at: Option<ImportId>,
 
    // Symbol properties
 
    pub class: SymbolClass,
 
    pub name: StringRef,
 
    pub definition: Option<DefinitionId>,
 
}
 

	
 
impl Symbol {
 
    pub(crate) fn new(root_id: RootId, scope: SymbolScope, span: InputSpan, class: SymbolClass, name: StringRef) -> Self {
 
        Self{
 
            defined_in_module: root_id,
 
            defined_in_scope: scope,
 
            definition_span: span,
 
            class,
 
            name,
 
            definition: None,
 
        }
 
    }
 
    pub name: StringRef<'static>,
 
    pub definition: SymbolDefinition,
 
}
 

	
 
pub struct SymbolTable {
 
    module_lookup: HashMap<StringRef, RootId>,
 
    module_lookup: HashMap<StringRef<'static>, RootId>,
 
    scope_lookup: HashMap<SymbolScope, ScopedSymbols>,
 
    symbols: Vec<Symbol>,
 
}
 

	
 
impl SymbolTable {
 
    /// Inserts a new module by its name. Upon module naming conflict the
 
    /// previously associated `RootId` will be returned.
 
    pub(crate) fn insert_module(&mut self, module_name: StringRef, root_id: RootId) -> Result<(), RootId> {
 
    pub(crate) fn insert_module(&mut self, module_name: StringRef<'static>, root_id: RootId) -> Result<(), RootId> {
 
        match self.module_lookup.entry(module_name) {
 
            Entry::Occupied(v) => {
 
                Err(*v.get())
 
@@ -76,32 +102,67 @@ impl SymbolTable {
 
        }
 
    }
 

	
 
    /// Inserts a new scope with defined symbols. The `parent_scope` must
 
    /// already be added to the symbol table. The symbols are expected to come
 
    /// from a temporary buffer and are copied inside the symbol table. Will
 
    /// return an error if there is a naming conflict.
 
    pub(crate) fn insert_scoped_symbols(
 
        &mut self, parent_scope: Option<SymbolScope>, within_scope: SymbolScope, symbols: &[Symbol]
 
    ) -> Result<(), ParseError> {
 
        // Add scoped symbols
 
        let old_num_symbols = self.symbols.len();
 

	
 
        let new_scope = ScopedSymbols {
 
            scope: within_scope,
 
    /// Retrieves module `RootId` by name
 
    pub(crate) fn get_module_by_name(&mut self, name: &[u8]) -> Option<RootId> {
 
        let string_ref = StringRef::new(name);
 
        self.module_lookup.get(&string_ref).map(|v| *v)
 
    }
 

	
 
    /// Inserts a new symbol scope. The parent must have been added to the
 
    /// symbol table before.
 
    pub(crate) fn insert_scope(&mut self, parent_scope: Option<SymbolScope>, new_scope: SymbolScope) {
 
        debug_assert!(
 
            parent_scope.is_none() || self.scope_lookup.contains_key(parent_scope.as_ref().unwrap()),
 
            "inserting scope {:?} but parent {:?} does not exist", new_scope, parent_scope
 
        );
 
        debug_assert!(!self.scope_lookup.contains_key(&new_scope), "inserting scope {:?}, but it already exists", new_scope);
 

	
 
        if let Some(parent_scope) = parent_scope {
 
            let parent = self.scope_lookup.get_mut(&parent_scope).unwrap();
 
            parent.child_scopes.push(new_scope);
 
        }
 

	
 
        let scope = ScopedSymbols {
 
            scope: new_scope,
 
            parent_scope,
 
            child_scopes: Vec::new(),
 
            start: old_num_symbols,
 
            end: old_num_symbols + symbols.len(),
 
            child_scopes: Vec::with_capacity(RESERVED_SYMBOLS),
 
            symbols: Vec::with_capacity(RESERVED_SYMBOLS)
 
        };
 
        self.scope_lookup.insert(new_scope, scope);
 
    }
 

	
 
        self.symbols.extend(symbols);
 
        self.scope_lookup.insert(within_scope, new_scope);
 
    /// Inserts a symbol into a particular scope. The symbol's name may not
 
    /// exist in the scope or any of its parents. If it does collide then the
 
    /// symbol will be returned, together with the symbol that has the same
 
    /// name.
 
    pub(crate) fn insert_symbol(&mut self, in_scope: SymbolScope, symbol: Symbol) -> Result<(), (Symbol, &Symbol)> {
 
        debug_assert!(self.scope_lookup.contains_key(&in_scope), "inserting symbol {}, but scope {:?} does not exist", symbol.name.as_str(), in_scope);
 
        let mut seek_scope = in_scope;
 
        loop {
 
            let scoped_symbols = self.scope_lookup.get(&seek_scope).unwrap();
 
            for existing_symbol in scoped_symbols.symbols.iter() {
 
                if symbol.name == existing_symbol.name {
 
                    return Err((symbol, existing_symbol))
 
                }
 
            }
 

	
 
        if let Some(parent_scope) = parent_scope.as_ref() {
 
            let parent = self.scope_lookup.get_mut(parent_scope).unwrap();
 
            parent.child_scopes.push(within_scope);
 
            match scoped_symbols.parent_scope {
 
                Some(parent_scope) => { seek_scope = parent_scope; },
 
                None => { break; }
 
            }
 
        }
 

	
 
        // If here, then there is no collision
 
        let scoped_symbols = self.scope_lookup.get_mut(&in_scope).unwrap();
 
        scoped_symbols.symbols.push(symbol);
 
        Ok(())
 
    }
 

	
 
    /// Retrieves a particular scope. As this will be called by the compiler to
 
    /// retrieve scopes that MUST exist, this function will panic if the
 
    /// indicated scope does not exist.
 
    pub(crate) fn get_scope_by_id(&mut self, scope: &SymbolScope) -> &mut ScopedSymbols {
 
        debug_assert!(self.scope_lookup.contains_key(scope), "retrieving scope {:?}, but it doesn't exist", scope);
 
        self.scope_lookup.get_mut(scope).unwrap()
 
    }
 
}
 
\ No newline at end of file
src/protocol/tests/utils.rs
Show inline comments
 
@@ -450,11 +450,11 @@ impl<'a> UnionTester<'a> {
 

	
 
pub(crate) struct FunctionTester<'a> {
 
    ctx: TestCtx<'a>,
 
    def: &'a Function,
 
    def: &'a FunctionDefinition,
 
}
 

	
 
impl<'a> FunctionTester<'a> {
 
    fn new(ctx: TestCtx<'a>, def: &'a Function) -> Self {
 
    fn new(ctx: TestCtx<'a>, def: &'a FunctionDefinition) -> Self {
 
        Self{ ctx, def }
 
    }
 

	
src/protocol/tokenizer/mod.rs
Show inline comments
 
@@ -8,7 +8,7 @@ use crate::protocol::input_source2::{
 
pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 
pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 
pub(crate) const KW_UNION:     &'static [u8] = b"union";
 
pub(crate) const KW_FUNCTION:  &'static [u8] = b"func";
 
pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
 
pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 
pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 
pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
 
@@ -127,9 +127,13 @@ pub(crate) struct TokenRange {
 
    pub range_kind: TokenRangeKind,
 
    pub curly_depth: u32,
 
    // InputPosition offset is limited to u32, so token ranges can be as well.
 
    pub start: u32,
 
    pub end: u32,
 
    pub subranges: u32,
 
    pub start: u32,             // first token (inclusive index)
 
    pub end: u32,               // last token (exclusive index)
 
    // Subranges and where they can be found
 
    pub subranges: u32,         // Number of subranges
 
    pub first_child_idx: u32,   // First subrange (or points to self if no subranges)
 
    pub last_child_idx: u32,    // Last subrange (or points to self if no subranges)
 
    pub next_sibling_idx: Option<u32>,
 
}
 

	
 
pub(crate) struct TokenBuffer {
 
@@ -280,6 +284,9 @@ impl Tokenizer {
 
            start: 0,
 
            end: 0,
 
            subranges: 0,
 
            first_child_idx: 0,
 
            last_child_idx: 0,
 
            next_sibling_idx: None,
 
        });
 

	
 
        // Main tokenization loop
 
@@ -367,6 +374,35 @@ impl Tokenizer {
 
            ));
 
        }
 

	
 
        // TODO: @remove once I'm sure the algorithm works. For now it is better
 
        //  if the debugging is a little more expedient
 
        if cfg!(debug_assertions) {
 
            // For each range make sure its children make sense
 
            for parent_idx in 0..target.ranges.len() {
 
                let cur_range = &target.ranges[parent_idx];
 
                if cur_range.subranges == 0 {
 
                    assert_eq!(cur_range.first_child_idx, parent_idx as u32);
 
                    assert_eq!(cur_range.last_child_idx, parent_idx as u32);
 
                } else {
 
                    assert_ne!(cur_range.first_child_idx, parent_idx as u32);
 
                    assert_ne!(cur_range.last_child_idx, parent_idx as u32);
 

	
 
                    let mut child_counter = 0u32;
 
                    let mut last_child_idx = cur_range.first_child_idx;
 
                    let mut child_idx = Some(cur_range.first_child_idx);
 
                    while let Some(cur_child_idx) = child_idx {
 
                        let child_range = &target.ranges[cur_child_idx as usize];
 
                        assert_eq!(child_range.parent_idx, parent_idx);
 
                        child_idx = child_range.next_sibling_idx;
 
                        child_counter += 1;
 
                    }
 

	
 
                    assert_eq!(cur_range.last_child_idx, last_child_idx);
 
                    assert_eq!(cur_range.subranges, child_counter);
 
                }
 
            }
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
@@ -820,8 +856,16 @@ impl Tokenizer {
 
        // intermediate "code" range.
 
        if cur_range.end != first_token {
 
            let code_start = cur_range.end;
 
            let code_range_idx = target.ranges.len() as u32;
 

	
 
            if cur_range.first_child_idx == self.stack_idx as u32 {
 
                // The parent of the new "code" range we're going to push does
 
                // not have any registered children yet.
 
                cur_range.first_child_idx = code_range_idx;
 
            }
 
            cur_range.last_child_idx = code_range_idx + 1;
 

	
 
            cur_range.end = first_token;
 
            debug_assert_ne!(code_start, first_token);
 
            cur_range.subranges += 1;
 
            target.ranges.push(TokenRange{
 
                parent_idx: self.stack_idx,
 
@@ -830,12 +874,25 @@ impl Tokenizer {
 
                start: code_start,
 
                end: first_token,
 
                subranges: 0,
 
                first_child_idx: code_range_idx,
 
                last_child_idx: code_range_idx,
 
                next_sibling_idx: Some(code_range_idx + 1), // we're going to push this thing next
 
            });
 
        } else {
 
            // We're going to push the range in the code below, but while we
 
            // have the `cur_range` borrowed mutably, we fix up its children
 
            // indices.
 
            let new_range_idx = target.ranges.len() as u32;
 
            if cur_range.first_child_idx == self.stack_idx as u32 {
 
                cur_range.first_child_idx = new_range_idx;
 
            }
 
            cur_range.last_child_idx = new_range_idx;
 
        }
 

	
 
        // Insert a new range
 
        let parent_idx = self.stack_idx;
 
        self.stack_idx = target.ranges.len();
 
        let new_range_idx = self.stack_idx as u32;
 
        target.ranges.push(TokenRange{
 
            parent_idx,
 
            range_kind,
 
@@ -843,6 +900,9 @@ impl Tokenizer {
 
            start: first_token,
 
            end: first_token,
 
            subranges: 0,
 
            first_child_idx: new_range_idx,
 
            last_child_idx: new_range_idx,
 
            next_sibling_idx: None,
 
        });
 
    }
 

	
0 comments (0 inline, 0 general)