From 1a42eb33aa761ebad2152020e4be27dbcfdac6d9 2021-04-15 17:47:02
From: MH <contact@maxhenger.nl>
Date: 2021-04-15 17:47:02
Subject: [PATCH] WIP on compiler rearchitecting

---
diff --git a/src/collections/string_pool.rs b/src/collections/string_pool.rs
index 30cb8a5c6451ca390b91ed9c5a3d93bf989a2406..54505dba6aa01de3f3c4567a3e23d362c6226e5c 100644
--- a/src/collections/string_pool.rs
+++ b/src/collections/string_pool.rs
@@ -1,17 +1,31 @@
 use std::ptr::null_mut;
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};
+use std::marker::PhantomData;
 
 const SLAB_SIZE: usize = u16::max_value() as usize;
 
 #[derive(Clone)]
-pub struct StringRef {
+pub struct StringRef<'a> {
     data: *const u8,
     length: usize,
+    _phantom: PhantomData<&'a [u8]>,
 }
 
-impl StringRef {
-    pub fn as_str<'a>(&'a self) -> &'a str {
+impl<'a> StringRef<'a> {
+    /// `new` constructs a new StringRef whose data is not owned by the
+    /// `StringPool`, hence cannot have a `'static` lifetime.
+    pub(crate) fn new(data: &'a [u8]) -> StringRef<'a> {
+        // This is an internal (compiler) function: so debug_assert that the
+        // string is valid UTF8. Most commonly the input will come from the
+        // code's source file, which is checked for ASCII-ness anyway.
+        debug_assert!(data.is_ascii());
+        let length = data.len();
+        let data = data.as_ptr();
+        StringRef{ data, length, _phantom: PhantomData }
+    }
+
+    pub fn as_str(&self) -> &'a str {
         unsafe {
             let slice = std::slice::from_raw_parts::<'a, u8>(self.data, self.length);
             std::str::from_utf8_unchecked(slice)
@@ -66,7 +80,7 @@ impl StringPool {
     }
 
     // Interns a string to the StringPool, returning a reference to it
-    pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef {
+    pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef<'static> {
         // TODO: Large string allocations, if ever needed.
         let data_len = data.len();
         assert!(data_len <= SLAB_SIZE, "string is too large for slab");
@@ -88,7 +102,7 @@ impl StringPool {
 
         unsafe {
             let start = last.data.as_ptr().offset(range_start as isize);
-            StringRef{ data: start, length: data_len }
+            StringRef{ data: start, length: data_len, _phantom: PhantomData }
         }
     }
 
diff --git a/src/protocol/arena.rs b/src/protocol/arena.rs
index df2d5ce1cd2ce9d45e97f3d4260f835a6fe7009a..68dc8b5cd6f8ad83dbeef578b351f0d2055e557a 100644
--- a/src/protocol/arena.rs
+++ b/src/protocol/arena.rs
@@ -3,14 +3,16 @@ use core::hash::Hash;
 use core::marker::PhantomData;
 
 pub struct Id<T> {
-    pub(crate) index: u32,
+    // Not actually a signed index into the heap. But the index is set to -1 if
+    // we don't know an ID yet. This is checked during debug mode.
+    pub(crate) index: i32,
     _phantom: PhantomData<T>,
 }
 
 impl<T> Id<T> {
-    pub(crate) fn new(index: u32) -> Self {
-        Self{ index, _phantom: Default::default() }
-    }
+    pub(crate) fn new_invalid() -> Self     { Self{ index: -1, _phantom: Default::default() } }
+    pub(crate) fn new(index: i32) -> Self   { Self{ index, _phantom: Default::default() } }
+    pub(crate) fn is_invalid(&self) -> bool { self.index < 0 }
 }
 
 #[derive(Debug)]
@@ -46,15 +48,19 @@ impl<T> Arena<T> {
     pub fn new() -> Self {
         Self { store: vec![] }
     }
+
     pub fn alloc_with_id(&mut self, f: impl FnOnce(Id<T>) -> T) -> Id<T> {
-        use std::convert::TryFrom;
-        let id = Id::new(u32::try_from(self.store.len()).expect("Out of capacity!"));
+        // Lets keep this a runtime assert.
+        assert!(self.store.len() < i32::max_value() as usize, "Arena out of capacity");
+        let id = Id::new(self.store.len() as i32);
         self.store.push(f(id));
         id
     }
+
     pub fn iter(&self) -> impl Iterator<Item = &T> {
         self.store.iter()
     }
+
     pub fn len(&self) -> usize {
         self.store.len()
     }
@@ -62,11 +68,13 @@ impl<T> Arena<T> {
 impl<T> core::ops::Index<Id<T>> for Arena<T> {
     type Output = T;
     fn index(&self, id: Id<T>) -> &Self::Output {
+        debug_assert!(!id.is_invalid(), "attempted to index into Arena with an invalid id (index < 0)");
         self.store.index(id.index as usize)
     }
 }
 impl<T> core::ops::IndexMut<Id<T>> for Arena<T> {
     fn index_mut(&mut self, id: Id<T>) -> &mut Self::Output {
+        debug_assert!(!id.is_invalid(), "attempted to index_mut into Arena with an invalid id (index < 0)");
         self.store.index_mut(id.index as usize)
     }
 }
\ No newline at end of file
diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs
index 2d42e0c68ffae8f1eb38fba278842d7de83d7a54..37c977fdd1de92cc52c1398305ff9553be0bc4a3 100644
--- a/src/protocol/ast.rs
+++ b/src/protocol/ast.rs
@@ -15,7 +15,6 @@ use crate::protocol::input_source2::{InputPosition2, InputSpan};
 const MAX_LEVEL: usize = 128;
 const MAX_NAMESPACES: usize = 64;
 
-
 /// Helper macro that defines a type alias for a AST element ID. In this case 
 /// only used to alias the `Id<T>` types.
 macro_rules! define_aliased_ast_id {
@@ -66,9 +65,9 @@ macro_rules! define_new_ast_id {
         pub struct $name (pub(crate) $parent);
 
         impl $name {
-            pub fn upcast(self) -> $parent {
-                self.0
-            }
+            pub(crate) fn new_invalid() -> Self     { Self($parent::new_invalid()) }
+            pub(crate) fn is_invalid(&self) -> bool { self.0.is_invalid() }
+            pub fn upcast(self) -> $parent          { self.0 }
         }
     };
     // Variant where we define the type, and the Index and IndexMut traits
@@ -127,11 +126,11 @@ define_new_ast_id!(ParameterId, VariableId, index(Parameter, Variable::Parameter
 define_new_ast_id!(LocalId, VariableId, index(Local, Variable::Local, variables), alloc(alloc_local));
 
 define_aliased_ast_id!(DefinitionId, Id<Definition>, index(Definition, definitions));
-define_new_ast_id!(StructId, DefinitionId, index(StructDefinition, Definition::Struct, definitions), alloc(alloc_struct_definition));
-define_new_ast_id!(EnumId, DefinitionId, index(EnumDefinition, Definition::Enum, definitions), alloc(alloc_enum_definition));
-define_new_ast_id!(UnionId, DefinitionId, index(UnionDefinition, Definition::Union, definitions), alloc(alloc_union_definition));
-define_new_ast_id!(ComponentId, DefinitionId, index(Component, Definition::Component, definitions), alloc(alloc_component));
-define_new_ast_id!(FunctionId, DefinitionId, index(Function, Definition::Function, definitions), alloc(alloc_function));
+define_new_ast_id!(StructDefinitionId, DefinitionId, index(StructDefinition, Definition::Struct, definitions), alloc(alloc_struct_definition));
+define_new_ast_id!(EnumDefinitionId, DefinitionId, index(EnumDefinition, Definition::Enum, definitions), alloc(alloc_enum_definition));
+define_new_ast_id!(UnionDefinitionId, DefinitionId, index(UnionDefinition, Definition::Union, definitions), alloc(alloc_union_definition));
+define_new_ast_id!(ComponentDefinitionId, DefinitionId, index(ComponentDefinition, Definition::Component, definitions), alloc(alloc_component_definition));
+define_new_ast_id!(FunctionDefinitionId, DefinitionId, index(FunctionDefinition, Definition::Function, definitions), alloc(alloc_function_definition));
 
 define_aliased_ast_id!(StatementId, Id<Statement>, index(Statement, statements));
 define_new_ast_id!(BlockStatementId, StatementId, index(BlockStatement, Statement::Block, statements), alloc(alloc_block_statement));
@@ -328,11 +327,10 @@ impl SyntaxElement for Import {
 pub struct ImportModule {
     pub this: ImportId,
     // Phase 1: parser
-    pub position: InputPosition,
-    pub module_name: Vec<u8>,
+    pub span: InputSpan,
+    pub module_name: Identifier,
     pub alias: Identifier,
-    // Phase 2: module resolving
-    pub module_id: Option<RootId>,
+    pub module_id: RootId,
 }
 
 #[derive(Debug, Clone)]
@@ -349,7 +347,7 @@ pub struct AliasedSymbol {
 pub struct ImportSymbols {
     pub this: ImportId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub module_name: Vec<u8>,
     // Phase 2: module resolving
     pub module_id: Option<RootId>,
@@ -363,7 +361,7 @@ pub struct ImportSymbols {
 #[derive(Debug, Clone)]
 pub struct Identifier {
     pub span: InputSpan,
-    pub value: StringRef,
+    pub value: StringRef<'static>,
 }
 
 impl PartialEq for Identifier {
@@ -965,8 +963,8 @@ pub enum Definition {
     Struct(StructDefinition),
     Enum(EnumDefinition),
     Union(UnionDefinition),
-    Component(Component),
-    Function(Function),
+    Component(ComponentDefinition),
+    Function(FunctionDefinition),
 }
 
 impl Definition {
@@ -1012,7 +1010,7 @@ impl Definition {
             _ => false,
         }
     }
-    pub fn as_component(&self) -> &Component {
+    pub fn as_component(&self) -> &ComponentDefinition {
         match self {
             Definition::Component(result) => result,
             _ => panic!("Unable to cast `Definition` to `Component`"),
@@ -1024,7 +1022,7 @@ impl Definition {
             _ => false,
         }
     }
-    pub fn as_function(&self) -> &Function {
+    pub fn as_function(&self) -> &FunctionDefinition {
         match self {
             Definition::Function(result) => result,
             _ => panic!("Unable to cast `Definition` to `Function`"),
@@ -1094,14 +1092,20 @@ pub struct StructFieldDefinition {
 
 #[derive(Debug, Clone)]
 pub struct StructDefinition {
-    pub this: StructId,
+    pub this: StructDefinitionId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     pub fields: Vec<StructFieldDefinition>
 }
 
+impl StructDefinition {
+    pub(crate) fn new_empty(this: StructDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
+        Self{ this, span, identifier, poly_vars: Vec::new(), fields: Vec::new() }
+    }
+}
+
 #[derive(Debug, Clone)]
 pub enum EnumVariantValue {
     None,
@@ -1117,14 +1121,20 @@ pub struct EnumVariantDefinition {
 
 #[derive(Debug, Clone)]
 pub struct EnumDefinition {
-    pub this: EnumId,
+    pub this: EnumDefinitionId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     pub variants: Vec<EnumVariantDefinition>,
 }
 
+impl EnumDefinition {
+    pub(crate) fn new_empty(this: EnumDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
+        Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() }
+    }
+}
+
 #[derive(Debug, Clone)]
 pub enum UnionVariantValue {
     None,
@@ -1140,14 +1150,20 @@ pub struct UnionVariantDefinition {
 
 #[derive(Debug, Clone)]
 pub struct UnionDefinition {
-    pub this: UnionId,
+    pub this: UnionDefinitionId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     pub variants: Vec<UnionVariantDefinition>,
 }
 
+impl UnionDefinition {
+    pub(crate) fn new_empty(this: UnionDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
+        Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() }
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 pub enum ComponentVariant {
     Primitive,
@@ -1155,10 +1171,10 @@ pub enum ComponentVariant {
 }
 
 #[derive(Debug, Clone)]
-pub struct Component {
-    pub this: ComponentId,
+pub struct ComponentDefinition {
+    pub this: ComponentDefinitionId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub variant: ComponentVariant,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
@@ -1166,17 +1182,28 @@ pub struct Component {
     pub body: StatementId,
 }
 
-impl SyntaxElement for Component {
+impl ComponentDefinition {
+    pub(crate) fn new_empty(this: ComponentDefinitionId, span: InputSpan, variant: ComponentVariant, identifier: Identifier) -> Self {
+        Self{ 
+            this, span, variant, identifier, 
+            poly_vars: Vec::new(), 
+            parameters: Vec::new(), 
+            body: StatementId::new_invalid()
+        }
+    }
+}
+
+impl SyntaxElement for ComponentDefinition {
     fn position(&self) -> InputPosition {
         self.position
     }
 }
 
 #[derive(Debug, Clone)]
-pub struct Function {
-    pub this: FunctionId,
+pub struct FunctionDefinition {
+    pub this: FunctionDefinitionId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan,
     pub return_type: ParserTypeId,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
@@ -1184,7 +1211,19 @@ pub struct Function {
     pub body: StatementId,
 }
 
-impl SyntaxElement for Function {
+impl FunctionDefinition {
+    pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier) -> Self {
+        Self {
+            this, span, identifier,
+            return_type: ParserTypeId::new_invalid(),
+            poly_vars: Vec::new(),
+            parameters: Vec::new(),
+            body: StatementId::new_invalid(),
+        }
+    }
+}
+
+impl SyntaxElement for FunctionDefinition {
     fn position(&self) -> InputPosition {
         self.position
     }
diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs
index 29ed1c92942d206de98b701953917367d665bd00..9642d69a11378626d2dfacc5f3bb509327c77541 100644
--- a/src/protocol/lexer.rs
+++ b/src/protocol/lexer.rs
@@ -2378,7 +2378,7 @@ impl Lexer<'_> {
 
         // Parse body
         let body = self.consume_block_statement(h)?;
-        Ok(h.alloc_component(|this| Component { 
+        Ok(h.alloc_component(|this| ComponentDefinition {
             this,
             variant: ComponentVariant::Composite,
             position,
@@ -2404,7 +2404,7 @@ impl Lexer<'_> {
 
         // Consume body
         let body = self.consume_block_statement(h)?;
-        Ok(h.alloc_component(|this| Component { 
+        Ok(h.alloc_component(|this| ComponentDefinition {
             this,
             variant: ComponentVariant::Primitive,
             position,
@@ -2430,7 +2430,7 @@ impl Lexer<'_> {
 
         // Consume body
         let body = self.consume_block_statement(h)?;
-        Ok(h.alloc_function(|this| Function {
+        Ok(h.alloc_function(|this| FunctionDefinition {
             this,
             position,
             return_type,
diff --git a/src/protocol/lexer2.rs b/src/protocol/lexer2.rs
index 1cbb06f354f4a7b846c6992ac58ba0cd033f80a6..52d6b62b3674fd8f2b1b682c25a686dd95e81b10 100644
--- a/src/protocol/lexer2.rs
+++ b/src/protocol/lexer2.rs
@@ -25,28 +25,13 @@ enum KeywordDefinition {
     Composite,
 }
 
-impl KeywordDefinition {
-    fn as_symbol_class(&self) -> SymbolClass {
-        use KeywordDefinition as KD;
-        use SymbolClass as SC;
-
-        match self {
-            KD::Struct => SC::Struct,
-            KD::Enum => SC::Enum,
-            KD::Union => SC::Union,
-            KD::Function => SC::Function,
-            KD::Primitive | KD::Composite => SC::Component,
-        }
-    }
-}
-
 struct Module {
     // Buffers
     source: InputSource,
     tokens: TokenBuffer,
     // Identifiers
     root_id: RootId,
-    name: Option<(PragmaId, StringRef)>,
+    name: Option<(PragmaId, StringRef<'static>)>,
     version: Option<(PragmaId, i64)>,
     phase: ModuleCompilationPhase,
 }
@@ -61,19 +46,23 @@ struct Ctx<'a> {
 /// added to the symbol table such that during AST-construction we know which
 /// identifiers point to types. Will also parse all pragmas to determine module
 /// names.
-pub(crate) struct ASTSymbolPrePass {
+pub(crate) struct PassPreSymbol {
     symbols: Vec<Symbol>,
     pragmas: Vec<PragmaId>,
+    imports: Vec<ImportId>,
+    definitions: Vec<DefinitionId>,
     buffer: String,
     has_pragma_version: bool,
     has_pragma_module: bool,
 }
 
-impl ASTSymbolPrePass {
+impl PassPreSymbol {
     pub(crate) fn new() -> Self {
         Self{
             symbols: Vec::with_capacity(128),
             pragmas: Vec::with_capacity(8),
+            imports: Vec::with_capacity(32),
+            definitions: Vec::with_capacity(128),
             buffer: String::with_capacity(128),
             has_pragma_version: false,
             has_pragma_module: false,
@@ -83,6 +72,8 @@ impl ASTSymbolPrePass {
     fn reset(&mut self) {
         self.symbols.clear();
         self.pragmas.clear();
+        self.imports.clear();
+        self.definitions.clear();
         self.has_pragma_version = false;
         self.has_pragma_module = false;
     }
@@ -92,11 +83,10 @@ impl ASTSymbolPrePass {
 
         let module = &mut modules[module_idx];
         let module_range = &module.tokens.ranges[0];
-        let expected_parent_idx = 0;
-        let expected_subranges = module_range.subranges;
+
         debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
-        debug_assert_eq!(module.root_id.index, 0);
+        debug_assert!(module.root_id.is_invalid()); // not set yet,
 
         // Preallocate root in the heap
         let root_id = ctx.heap.alloc_protocol_description(|this| {
@@ -109,44 +99,45 @@ impl ASTSymbolPrePass {
         });
         module.root_id = root_id;
 
-        // Visit token ranges to detect defintions
-        let mut visited_subranges = 0;
-        for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
-            // Skip any ranges that do not belong to the module
-            let cur_range = &module.tokens.ranges[range_idx];
-            if cur_range.parent_idx != expected_parent_idx {
-                continue;
-            }
+        // Visit token ranges to detect definitions and pragmas
+        let mut range_idx = module_range.first_child_idx;
+        loop {
+            let range_idx_usize = range_idx as usize;
+            let cur_range = &module.tokens.ranges[range_idx_usize];
 
             // Parse if it is a definition or a pragma
             if cur_range.range_kind == TokenRangeKind::Definition {
-                self.visit_definition_range(modules, module_idx, ctx, range_idx)?;
+                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
             } else if cur_range.range_kind == TokenRangeKind::Pragma {
-                self.visit_pragma_range(modules, module_idx, ctx, range_idx)?;
+                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
             }
 
-            visited_subranges += 1;
-            if visited_subranges == expected_subranges {
-                break;
+            match cur_range.next_sibling_idx {
+                Some(idx) => { range_idx = idx; },
+                None => { break; },
             }
         }
 
-        // By now all symbols should have been found: add to symbol table and
-        // add the parsed pragmas to the preallocated root in the heap.
-        debug_assert_eq!(visited_subranges, expected_subranges);
-        ctx.symbols.insert_scoped_symbols(None, SymbolScope::Module(module.root_id), &self.symbols);
+        // Add the module's symbol scope and the symbols we just parsed
+        let module_scope = SymbolScope::Module(root_id);
+        ctx.symbols.insert_scope(None, module_scope);
+        for symbol in self.symbols.drain(..) {
+            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
+                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol))
+            }
+        }
 
+        // Modify the preallocated root
         let root = &mut ctx.heap[root_id];
-        debug_assert!(root.pragmas.is_empty());
-        root.pragmas.extend(&self.pragmas);
-
+        root.pragmas.extend(self.pragmas.drain(..));
+        root.definitions.extend(self.definitions.drain(..));
         module.phase = ModuleCompilationPhase::DefinitionsScanned;
 
         Ok(())
     }
 
-    fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
-        let module = &mut modules[module_idx];
+    fn visit_pragma_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let mut iter = module.tokens.iter_range(range);
 
@@ -213,7 +204,7 @@ impl ASTSymbolPrePass {
         Ok(())
     }
 
-    fn visit_definition_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
+    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let definition_span = InputSpan::from_positions(
@@ -222,60 +213,107 @@ impl ASTSymbolPrePass {
         );
         let mut iter = module.tokens.iter_range(range);
 
-        // Because we're visiting a definition, we expect an ident that resolves
-        // to a keyword indicating a definition.
-        let kw_text = consume_ident_text(&module.source, &mut iter).unwrap();
+        // First ident must be type of symbol
+        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
         let kw = parse_definition_keyword(kw_text).unwrap();
 
-        // Retrieve identifier and put in temp symbol table
-        let definition_ident = consume_ident_text(&module.source, &mut iter)?;
-        let definition_ident = ctx.pool.intern(definition_ident);
-        let symbol_class = kw.as_symbol_class();
-
-        // Get the token indicating the end of the definition to get the full
-        // span of the definition
-        let last_token = &module.tokens.tokens[range.end - 1];
-        debug_assert_eq!(last_token.kind, TokenKind::CloseCurly);
+        // Retrieve identifier of definition
+        let (identifier_text, identifier_span) = consume_ident(&module.source, &mut iter)?;
+        let ident_text = ctx.pool.intern(identifier_text);
+        let identifier = Identifier{ span: identifier_span, value: ident_text };
+
+        // Reserve space in AST for definition and add it to the symbol table
+        let symbol_definition;
+        let ast_definition_id;
+        match kw {
+            KeywordDefinition::Struct => {
+                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
+                    StructDefinition::new_empty(this, definition_span, identifier)
+                });
+                symbol_definition = SymbolDefinition::Struct(struct_def_id);
+                ast_definition_id = struct_def_id.upcast();
+            },
+            KeywordDefinition::Enum => {
+                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
+                    EnumDefinition::new_empty(this, definition_span, identifier)
+                });
+                symbol_definition = SymbolDefinition::Enum(enum_def_id);
+                ast_definition_id = enum_def_id.upcast();
+            },
+            KeywordDefinition::Union => {
+                let union_def_id = ctx.heap.alloc_union_definition(|this| {
+                    UnionDefinition::new_empty(this, definition_span, identifier)
+                });
+                symbol_definition = SymbolDefinition::Union(union_def_id);
+                ast_definition_id = union_def_id.upcast()
+            },
+            KeywordDefinition::Function => {
+                let func_def_id = ctx.heap.alloc_function_definition(|this| {
+                    FunctionDefinition::new_empty(this, definition_span, identifier)
+                });
+                symbol_definition = SymbolDefinition::Function(func_def_id);
+                ast_definition_id = func_def_id.upcast();
+            },
+            KeywordDefinition::Primitive | KeywordDefinition::Composite => {
+                let component_variant = if kw == KeywordDefinition::Primitive {
+                    ComponentVariant::Primitive
+                } else {
+                    ComponentVariant::Composite
+                };
+                let comp_def_id = ctx.heap.alloc_component_definition(|this| {
+                    ComponentDefinition::new_empty(this, definition_span, component_variant, identifier)
+                });
+                symbol_definition = SymbolDefinition::Component(comp_def_id);
+                ast_definition_id = comp_def_id.upcast();
+            }
+        }
 
-        self.symbols.push(Symbol::new(
-            module.root_id,
-            SymbolScope::Module(module.root_id),
+        let symbol = Symbol{
+            defined_in_module: module.root_id,
+            defined_in_scope: SymbolScope::Module(module.root_id),
             definition_span,
-            symbol_class,
-            definition_ident
-        ));
+            identifier_span,
+            introduced_at: None,
+            name: definition_ident,
+            definition: symbol_definition
+        };
+        self.symbols.push(symbol);
+        self.definitions.push(ast_definition_id);
 
         Ok(())
     }
 }
 
-pub(crate) struct ASTImportPrePass {
+/// Parses all the imports in the module tokens. Is applied after the
+/// definitions and name of modules are resolved. Hence we should be able to
+/// resolve all symbols to their appropriate module/definition.
+pub(crate) struct PassImport {
+    imports: Vec<ImportId>,
 }
 
-impl ASTImportPrePass {
+impl PassImport {
+    pub(crate) fn new() -> Self {
+        Self{ imports: Vec::with_capacity(32) }
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
+        debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::DefinitionsScanned));
         debug_assert_eq!(module.phase, ModuleCompilationPhase::DefinitionsScanned);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 
-        let expected_parent_idx = 0;
-        let expected_subranges = module_range.subranges;
-        let mut visited_subranges = 0;
+        let mut range_idx = module_range.first_child_idx;
+        loop {
+            let range_idx_usize = range_idx as usize;
+            let cur_range = &module.tokens.ranges[range_idx_usize];
 
-        for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
-            let cur_range = &module.tokens.ranges[range_idx];
-            if cur_range.parent_idx != expected_parent_idx {
-                continue;
-            }
-
-            visited_subranges += 1;
             if cur_range.range_kind == TokenRangeKind::Import {
-                self.visit_import_range(modules, module_idx, ctx, range_idx)?;
+                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
             }
 
-            if visited_subranges == expected_subranges {
-                break;
+            match cur_range.next_sibling_idx {
+                Some(idx) => { range_idx = idx; },
+                None => { break; }
             }
         }
 
@@ -292,26 +330,81 @@ impl ASTImportPrePass {
         let mut iter = module.tokens.iter_range(import_range);
 
         // Consume "import"
-        let _import_ident = consume_ident_text(&module.source, &mut iter)?;
+        let (_import_ident, import_span) =
+            consume_ident(&module.source, &mut iter)?;
         debug_assert_eq!(_import_ident, KW_IMPORT);
 
         // Consume module name
-        let (module_name, _) = consume_domain_ident(&module.source, &mut iter)?;
+        let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
+        let target_root_id = ctx.symbols.get_module_by_name(module_name);
+        if target_root_id.is_none() {
+            return Err(ParseError::new_error_at_span(
+                &module.source, module_name_span,
+                format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
+            ));
+        }
+        let module_name = ctx.pool.intern(module_name);
+        let target_root_id = target_root_id.unwrap();
+
+        // Check for subsequent characters
+        let next = iter.next();
+        if has_ident(&module.source, &mut iter, b"as") {
+            iter.consume();
+            let (alias_text, alias_span) = consume_ident(source, &mut iter)?;
+            let alias = ctx.pool.intern(alias_text);
 
+            let import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
+                this,
+                span: import_span,
+                module_name: Identifier{ span: module_name_span, value: module_name },
+                alias: Identifier{ span: alias_span, value: alias },
+                module_id: target_root_id
+            }));
+            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
+                defined_in_module: target_root_id,
+                defined_in_scope: SymbolScope::Module(target_root_id),
+                definition_span
+            })
+        } else if Some(TokenKind::ColonColon) == next {
+            iter.consume();
+        } else {
+            // Assume implicit alias, then check if we get the semicolon next
+            let module_name_str = module_name.as_str();
+            let last_ident_start = module_name_str.rfind('.').map_or(0, |v| v + 1);
+            let alias_text = &module_name_str.as_bytes()[last_ident_start..];
+            let alias = ctx.pool.intern(alias_text);
+            let alias_span = InputSpan::from_positions(
+                module_name_span.begin.with_offset(last_ident_start as u32),
+                module_name_span.end
+            );
+        }
 
         Ok(())
     }
 }
 
 fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
-    let (_, name_start, mut name_end) = consume_ident(source, iter)?;
+    let (_, mut span) = consume_ident(source, iter)?;
     while let Some(TokenKind::Dot) = iter.next() {
         consume_dot(source, iter)?;
-        let (_, _, new_end) = consume_ident(source, iter)?;
-        name_end = new_end;
+        let (_, new_span) = consume_ident(source, iter)?;
+        span.end = new_span.end;
     }
 
-    Ok((source.section(name_start, name_end), InputSpan::from_positions(name_start, name_end)))
+    // Not strictly necessary, but probably a reasonable restriction: this
+    // simplifies parsing of module naming and imports.
+    if span.begin.line != span.end.line {
+        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
+    }
+
+    // If module name consists of a single identifier, then it may not match any
+    // of the reserved keywords
+    let section = source.section(span.begin, span.end);
+    if is_reserved_keyword(section) {
+        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
+
+    Ok((source.section(span.begin, span.end), span))
 }
 
 fn consume_dot<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(), ParseError> {
@@ -387,22 +480,126 @@ fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(
     Ok((source.section(pragma_start, pragma_end), pragma_start, pragma_end))
 }
 
-fn consume_ident_text<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<&'a [u8], ParseError> {
-    if Some(TokenKind::Ident) != iter.next() {
-        return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected an identifier"));
+fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
+    if Some(TokenKind::Ident) == iter.next() {
+        let (start, end) = iter.next_range();
+        return source.section(start, end) == expected;
     }
-    let (ident_start, ident_end) = iter.next_range();
-    iter.consume();
-    Ok(source.section(ident_start, ident_end))
+
+    false
+}
+
+fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
+    if Some(TokenKind::Ident) == iter.next() {
+        let (start, end) = iter.next_range();
+        return Some(source.section(start, end))
+    }
+
+    None
 }
 
-fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
+/// Consumes any identifier and returns it together with its span. Does not
+/// check if the identifier is a reserved keyword.
+fn consume_any_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
     if Some(TokenKind::Ident) != iter.next() {
         return Err(ParseError::new_error(sourcee, iter.last_valid_pos(), "expected an identifier"));
     }
     let (ident_start, ident_end) = iter.next_range();
     iter.consume();
-    Ok((source.section(ident_start, ident_end), ident_start, ident_end))
+    Ok((source.section(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
+}
+
+/// Consumes an identifier that is not a reserved keyword and returns it
+/// together with its span.
+fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
+    let (ident, span) = consume_any_ident(source, iter)?;
+    if is_reserved_keyword(ident) {
+        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
+
+    Ok((ident, span))
+}
+
+fn is_reserved_definition_keyword(text: &[u8]) -> bool {
+    return ([
+        b"struct", b"enum", b"union", b"function", b"primitive"
+    ] as &[[u8]]).contains(text)
+}
+
+fn is_reserved_statement_keyword(text: &[u8]) -> bool {
+    return ([
+        b"channel", b"import", b"as",
+        b"if", b"while", b"break", b"continue", b"goto", b"return",
+        b"synchronous", b"assert", b"new",
+    ] as &[[u8]]).contains(text)
+}
+
+fn is_reserved_expression_keyword(text: &[u8]) -> bool {
+    return ([
+        b"let", b"true", b"false", b"null", // literals
+        b"get", b"put", b"fires", b"create", b"length", // functions
+    ] as &[[u8]]).contains(text)
+}
+
+fn is_reserved_type_keyword(text: &[u8]) -> bool {
+    return ([
+        b"in", b"out", b"msg",
+        b"bool",
+        b"u8", b"u16", b"u32", b"u64",
+        b"s8", b"s16", b"s32", b"s64",
+        b"auto"
+    ] as &[[u8]]).contains(text)
+}
+
+fn is_reserved_keyword(text: &[u8]) -> bool {
+    return
+        is_reserved_definition_keyword(text) ||
+        is_reserved_statement_keyword(text) ||
+        is_reserved_type_keyword(text);
+}
+
+/// Constructs a human-readable message indicating why there is a conflict of
+/// symbols.
+// Note: passing the `module_idx` is not strictly necessary, but will prevent
+// programmer mistakes during development: we get a conflict because we're
+// currently parsing a particular module.
+fn construct_symbol_conflict_error(modules: &[Module], module_idx: usize, ctx: &Ctx, new_symbol: &Symbol, old_symbol: &Symbol) -> ParseError {
+    let module = &modules[module_idx];
+    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) {
+        match symbol.introduced_at {
+            Some(import_id) => {
+                // Symbol is being imported
+                let import = &ctx.heap[import_id];
+                match import {
+                    Import::Module(import) => (
+                        format!("the module aliased as '{}' imported here", symbol.name.as_str()),
+                        import.span
+                    ),
+                    Import::Symbols(symbols) => (
+                        format!("the type '{}' imported here", symbol.name.as_str()),
+                        symbols.span
+                    ),
+                }
+            },
+            None => {
+                // Symbol is being defined
+                debug_assert_eq!(symbol.defined_in_module, module.root_id);
+                debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module);
+                (
+                    format!("the type '{}' defined here", symbol.name.as_str()),
+                    symbol.identifier_span
+                )
+            }
+        }
+    };
+
+    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
+    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
+    return ParseError::new_error_at_span(
+        &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
+    ).with_info_at_span(
+        &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
+    )
 }
 
 fn parse_definition_keyword(keyword: &[u8]) -> Option<KeywordDefinition> {
diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs
index fecb9d079df26a592365855742a6eecb355f8620..f2f6839a73bd6eedb2c0390b57140d2006a52598 100644
--- a/src/protocol/parser/symbol_table2.rs
+++ b/src/protocol/parser/symbol_table2.rs
@@ -1,3 +1,12 @@
+/// symbol_table.rs
+///
+/// The datastructure used to lookup symbols within particular scopes. Scopes
+/// may be module-level or definition level, although imports and definitions
+/// within definitions are currently not allowed.
+///
+/// TODO: Once the compiler has matured, find out ways to optimize to prevent
+///     the repeated HashMap lookup.
+
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 
@@ -5,7 +14,9 @@ use crate::protocol::input_source2::*;
 use crate::protocol::ast::*;
 use crate::collections::*;
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+const RESERVED_SYMBOLS: usize = 32;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum SymbolScope {
     Module(RootId),
     Definition(DefinitionId),
@@ -25,46 +36,61 @@ struct ScopedSymbols {
     scope: SymbolScope,
     parent_scope: Option<SymbolScope>,
     child_scopes: Vec<SymbolScope>,
-    start: usize,
-    end: usize,
+    symbols: Vec<Symbol>,
+}
+
+pub enum SymbolDefinition {
+    Module(RootId),
+    Struct(StructDefinitionId),
+    Enum(EnumDefinitionId),
+    Union(UnionDefinitionId),
+    Function(FunctionDefinitionId),
+    Component(ComponentDefinitionId),
+}
+
+impl SymbolDefinition {
+    pub fn symbol_class(&self) -> SymbolClass {
+        use SymbolDefinition as SD;
+        use SymbolClass as SC;
+
+        match self {
+            SD::Module(_) => SC::Module,
+            SD::Struct(_) => SC::Struct,
+            SD::Enum(_) => SC::Enum,
+            SD::Union(_) => SC::Union,
+            SD::Function(_) => SC::Function,
+            SD::Component(_) => SC::Component,
+        }
+    }
+}
+
+pub enum SymbolData {
+    
 }
 
 pub struct Symbol {
-    // Definition location
+    // Definition location (may be different from the scope/module in which it
+    // is used if the symbol is imported)
     pub defined_in_module: RootId,
     pub defined_in_scope: SymbolScope,
-    pub definition_span: InputSpan, // full span of definition
+    pub definition_span: InputSpan, // full span of definition, not just the name
+    pub identifier_span: InputSpan, // span of just the identifier
     // Introduction location (if imported instead of defined)
-
+    pub introduced_at: Option<ImportId>,
     // Symbol properties
-    pub class: SymbolClass,
-    pub name: StringRef,
-    pub definition: Option<DefinitionId>,
-}
-
-impl Symbol {
-    pub(crate) fn new(root_id: RootId, scope: SymbolScope, span: InputSpan, class: SymbolClass, name: StringRef) -> Self {
-        Self{
-            defined_in_module: root_id,
-            defined_in_scope: scope,
-            definition_span: span,
-            class,
-            name,
-            definition: None,
-        }
-    }
+    pub name: StringRef<'static>,
+    pub definition: SymbolDefinition,
 }
 
 pub struct SymbolTable {
-    module_lookup: HashMap<StringRef, RootId>,
+    module_lookup: HashMap<StringRef<'static>, RootId>,
     scope_lookup: HashMap<SymbolScope, ScopedSymbols>,
-    symbols: Vec<Symbol>,
 }
 
 impl SymbolTable {
     /// Inserts a new module by its name. Upon module naming conflict the
     /// previously associated `RootId` will be returned.
-    pub(crate) fn insert_module(&mut self, module_name: StringRef, root_id: RootId) -> Result<(), RootId> {
+    pub(crate) fn insert_module(&mut self, module_name: StringRef<'static>, root_id: RootId) -> Result<(), RootId> {
         match self.module_lookup.entry(module_name) {
             Entry::Occupied(v) => {
                 Err(*v.get())
@@ -76,32 +102,67 @@ impl SymbolTable {
         }
     }
 
-    /// Inserts a new scope with defined symbols. The `parent_scope` must
-    /// already be added to the symbol table. The symbols are expected to come
-    /// from a temporary buffer and are copied inside the symbol table. Will
-    /// return an error if there is a naming conflict.
-    pub(crate) fn insert_scoped_symbols(
-        &mut self, parent_scope: Option<SymbolScope>, within_scope: SymbolScope, symbols: &[Symbol]
-    ) -> Result<(), ParseError> {
-        // Add scoped symbols
-        let old_num_symbols = self.symbols.len();
-
-        let new_scope = ScopedSymbols {
-            scope: within_scope,
+    /// Retrieves module `RootId` by name
+    pub(crate) fn get_module_by_name(&mut self, name: &[u8]) -> Option<RootId> {
+        let string_ref = StringRef::new(name);
+        self.module_lookup.get(&string_ref).map(|v| *v)
+    }
+
+    /// Inserts a new symbol scope. The parent must have been added to the
+    /// symbol table before.
+    pub(crate) fn insert_scope(&mut self, parent_scope: Option<SymbolScope>, new_scope: SymbolScope) {
+        debug_assert!(
+            parent_scope.is_none() || self.scope_lookup.contains_key(parent_scope.as_ref().unwrap()),
+            "inserting scope {:?} but parent {:?} does not exist", new_scope, parent_scope
+        );
+        debug_assert!(!self.scope_lookup.contains_key(&new_scope), "inserting scope {:?}, but it already exists", new_scope);
+
+        if let Some(parent_scope) = parent_scope {
+            let parent = self.scope_lookup.get_mut(&parent_scope).unwrap();
+            parent.child_scopes.push(new_scope);
+        }
+
+        let scope = ScopedSymbols {
+            scope: new_scope,
             parent_scope,
-            child_scopes: Vec::new(),
-            start: old_num_symbols,
-            end: old_num_symbols + symbols.len(),
+            child_scopes: Vec::with_capacity(RESERVED_SYMBOLS),
+            symbols: Vec::with_capacity(RESERVED_SYMBOLS)
         };
+        self.scope_lookup.insert(new_scope, scope);
+    }
 
-        self.symbols.extend(symbols);
-        self.scope_lookup.insert(within_scope, new_scope);
+    /// Inserts a symbol into a particular scope. The symbol's name may not
+    /// exist in the scope or any of its parents. If it does collide then the
+    /// symbol will be returned, together with the symbol that has the same
+    /// name.
+    pub(crate) fn insert_symbol(&mut self, in_scope: SymbolScope, symbol: Symbol) -> Result<(), (Symbol, &Symbol)> {
+        debug_assert!(self.scope_lookup.contains_key(&in_scope), "inserting symbol {}, but scope {:?} does not exist", symbol.name.as_str(), in_scope);
+        let mut seek_scope = in_scope;
+        loop {
+            let scoped_symbols = self.scope_lookup.get(&seek_scope).unwrap();
+            for existing_symbol in scoped_symbols.symbols.iter() {
+                if symbol.name == existing_symbol.name {
+                    return Err((symbol, existing_symbol))
+                }
+            }
 
-        if let Some(parent_scope) = parent_scope.as_ref() {
-            let parent = self.scope_lookup.get_mut(parent_scope).unwrap();
-            parent.child_scopes.push(within_scope);
+            match scoped_symbols.parent_scope {
+                Some(parent_scope) => { seek_scope = parent_scope; },
+                None => { break; }
+            }
         }
 
+        // If here, then there is no collision
+        let scoped_symbols = self.scope_lookup.get_mut(&in_scope).unwrap();
+        scoped_symbols.symbols.push(symbol);
         Ok(())
     }
+
+    /// Retrieves a particular scope. As this will be called by the compiler to
+    /// retrieve scopes that MUST exist, this function will panic if the
+    /// indicated scope does not exist.
+    pub(crate) fn get_scope_by_id(&mut self, scope: &SymbolScope) -> &mut ScopedSymbols {
+        debug_assert!(self.scope_lookup.contains_key(scope), "retrieving scope {:?}, but it doesn't exist", scope);
+        self.scope_lookup.get_mut(scope).unwrap()
+    }
 }
\ No newline at end of file
diff --git a/src/protocol/tests/utils.rs b/src/protocol/tests/utils.rs
index 248844a38e89aa14307369f43b870804a1610c50..ece23a2f6126c52d67029237c90612ac3d21ddfb 100644
--- a/src/protocol/tests/utils.rs
+++ b/src/protocol/tests/utils.rs
@@ -450,11 +450,11 @@ impl<'a> UnionTester<'a> {
 
 pub(crate) struct FunctionTester<'a> {
     ctx: TestCtx<'a>,
-    def: &'a Function,
+    def: &'a FunctionDefinition,
 }
 
 impl<'a> FunctionTester<'a> {
-    fn new(ctx: TestCtx<'a>, def: &'a Function) -> Self {
+    fn new(ctx: TestCtx<'a>, def: &'a FunctionDefinition) -> Self {
         Self{ ctx, def }
     }
 
diff --git a/src/protocol/tokenizer/mod.rs b/src/protocol/tokenizer/mod.rs
index ebd4ee5555bd52674797aebbdc998b35c0d3e5d9..096cd3e853ea3e20b6c6116c2e1d23e6bfb6201e 100644
--- a/src/protocol/tokenizer/mod.rs
+++ b/src/protocol/tokenizer/mod.rs
@@ -8,7 +8,7 @@ use crate::protocol::input_source2::{
 pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 pub(crate) const KW_UNION:     &'static [u8] = b"union";
-pub(crate) const KW_FUNCTION:  &'static [u8] = b"func";
+pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
 pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
@@ -127,9 +127,13 @@ pub(crate) struct TokenRange {
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // InputPosition offset is limited to u32, so token ranges can be as well.
-    pub start: u32,
-    pub end: u32,
-    pub subranges: u32,
+    pub start: u32,             // first token (inclusive index)
+    pub end: u32,               // last token (exclusive index)
+    // Subranges and where they can be found
+    pub subranges: u32,         // Number of subranges
+    pub first_child_idx: u32,   // First subrange (or points to self if no subranges)
+    pub last_child_idx: u32,    // Last subrange (or points to self if no subranges)
+    pub next_sibling_idx: Option<u32>,
 }
 
 pub(crate) struct TokenBuffer {
@@ -280,6 +284,9 @@ impl Tokenizer {
             start: 0,
             end: 0,
             subranges: 0,
+            first_child_idx: 0,
+            last_child_idx: 0,
+            next_sibling_idx: None,
         });
 
         // Main tokenization loop
@@ -367,6 +374,35 @@ impl Tokenizer {
             ));
         }
 
+        // TODO: @remove once I'm sure the algorithm works. For now it is better
+        //  if the debugging is a little more expedient
+        if cfg!(debug_assertions) {
+            // For each range make sure its children make sense
+            for parent_idx in 0..target.ranges.len() {
+                let cur_range = &target.ranges[parent_idx];
+                if cur_range.subranges == 0 {
+                    assert_eq!(cur_range.first_child_idx, parent_idx as u32);
+                    assert_eq!(cur_range.last_child_idx, parent_idx as u32);
+                } else {
+                    assert_ne!(cur_range.first_child_idx, parent_idx as u32);
+                    assert_ne!(cur_range.last_child_idx, parent_idx as u32);
+
+                    let mut child_counter = 0u32;
+                    let mut last_child_idx = cur_range.first_child_idx;
+                    let mut child_idx = Some(cur_range.first_child_idx);
+                    while let Some(cur_child_idx) = child_idx {
+                        let child_range = &target.ranges[cur_child_idx as usize];
+                        assert_eq!(child_range.parent_idx, parent_idx);
+                        child_idx = child_range.next_sibling_idx;
+                        child_counter += 1;
+                    }
+
+                    assert_eq!(cur_range.last_child_idx, last_child_idx);
+                    assert_eq!(cur_range.subranges, child_counter);
+                }
+            }
+        }
+
         Ok(())
     }
 
@@ -820,8 +856,16 @@ impl Tokenizer {
         // intermediate "code" range.
         if cur_range.end != first_token {
             let code_start = cur_range.end;
+            let code_range_idx = target.ranges.len() as u32;
+
+            if cur_range.first_child_idx == self.stack_idx as u32 {
+                // The parent of the new "code" range we're going to push does
+                // not have any registered children yet.
+                cur_range.first_child_idx = code_range_idx;
+            }
+            cur_range.last_child_idx = code_range_idx + 1;
+
             cur_range.end = first_token;
-            debug_assert_ne!(code_start, first_token);
             cur_range.subranges += 1;
             target.ranges.push(TokenRange{
                 parent_idx: self.stack_idx,
@@ -830,12 +874,25 @@ impl Tokenizer {
                 start: code_start,
                 end: first_token,
                 subranges: 0,
+                first_child_idx: code_range_idx,
+                last_child_idx: code_range_idx,
+                next_sibling_idx: Some(code_range_idx + 1), // we're going to push this thing next
             });
+        } else {
+            // We're going to push the range in the code below, but while we
+            // have the `cur_range` borrowed mutably, we fix up its children
+            // indices.
+            let new_range_idx = target.ranges.len() as u32;
+            if cur_range.first_child_idx == self.stack_idx as u32 {
+                cur_range.first_child_idx = new_range_idx;
+            }
+            cur_range.last_child_idx = new_range_idx;
         }
 
         // Insert a new range
         let parent_idx = self.stack_idx;
         self.stack_idx = target.ranges.len();
+        let new_range_idx = self.stack_idx as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind,
@@ -843,6 +900,9 @@ impl Tokenizer {
             start: first_token,
             end: first_token,
             subranges: 0,
+            first_child_idx: new_range_idx,
+            last_child_idx: new_range_idx,
+            next_sibling_idx: None,
         });
     }