From ddddcd3cc9aab55e98e7b671c326625501aa07e9 2021-04-18 10:47:33
From: MH <contact@maxhenger.nl>
Date: 2021-04-18 10:47:33
Subject: [PATCH] WIP on compiler rearchitecting

---
diff --git a/src/collections/string_pool.rs b/src/collections/string_pool.rs
index 54505dba6aa01de3f3c4567a3e23d362c6226e5c..91de3c466cf5cfedb0efcb9c6194897c51bdad6a 100644
--- a/src/collections/string_pool.rs
+++ b/src/collections/string_pool.rs
@@ -17,7 +17,7 @@ impl<'a> StringRef<'a> {
     /// `StringPool`, hence cannot have a `'static` lifetime.
     pub(crate) fn new(data: &'a [u8]) -> StringRef<'a> {
         // This is an internal (compiler) function: so debug_assert that the
-        // string is valid UTF8. Most commonly the input will come from the
+        // string is valid ascii. Most commonly the input will come from the
         // code's source file, which is checked for ASCII-ness anyway.
         debug_assert!(data.is_ascii());
         let length = data.len();
@@ -31,6 +31,12 @@ impl<'a> StringRef<'a> {
             std::str::from_utf8_unchecked(slice)
         }
     }
+
+    pub fn as_bytes(&self) -> &'a [u8] {
+        unsafe {
+            std::slice::from_raw_parts::<'a, u8>(self.data, self.length)
+        }
+    }
 }
 
 impl PartialEq for StringRef {
@@ -79,7 +85,10 @@ impl StringPool {
         }
     }
 
-    // Interns a string to the StringPool, returning a reference to it
+    /// Interns a string to the `StringPool`, returning a reference to it. The
+    /// pointer owned by `StringRef` is `'static` as the `StringPool` doesn't
+    /// reallocate/deallocate until dropped (which only happens at the end of
+    /// the program.)
     pub(crate) fn intern(&mut self, data: &[u8]) -> StringRef<'static> {
         // TODO: Large string allocations, if ever needed.
         let data_len = data.len();
diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs
index 3a07106e8d89de2e295b473c3ebbc6f723135a9c..05d78f666bb6b336eee0c943a2bca7e9467d546e 100644
--- a/src/protocol/ast.rs
+++ b/src/protocol/ast.rs
@@ -176,7 +176,6 @@ pub struct Heap {
     // Contents of a file, these are the elements the `Root` elements refer to
     pragmas: Arena<Pragma>,
     pub(crate) imports: Arena<Import>,
-    identifiers: Arena<Identifier>,
     pub(crate) parser_types: Arena<ParserType>,
     pub(crate) variables: Arena<Variable>,
     pub(crate) definitions: Arena<Definition>,
@@ -191,7 +190,6 @@ impl Heap {
             protocol_descriptions: Arena::new(),
             pragmas: Arena::new(),
             imports: Arena::new(),
-            identifiers: Arena::new(),
             parser_types: Arena::new(),
             variables: Arena::new(),
             definitions: Arena::new(),
@@ -303,13 +301,19 @@ impl Import {
     pub(crate) fn as_module(&self) -> &ImportModule {
         match self {
             Import::Module(m) => m,
-            _ => panic!("Unable to cast 'Import' to 'ImportModule'")
+            _ => unreachable!("Unable to cast 'Import' to 'ImportModule'")
         }
     }
     pub(crate) fn as_symbols(&self) -> &ImportSymbols {
         match self {
             Import::Symbols(m) => m,
-            _ => panic!("Unable to cast 'Import' to 'ImportSymbols'")
+            _ => unreachable!("Unable to cast 'Import' to 'ImportSymbols'")
+        }
+    }
+    pub(crate) fn as_symbols_mut(&mut self) -> &mut ImportSymbols {
+        match self {
+            Import::Symbols(m) => m,
+            _ => unreachable!("Unable to cast 'Import' to 'ImportSymbols'")
         }
     }
 }
@@ -328,17 +332,15 @@ pub struct ImportModule {
     pub this: ImportId,
     // Phase 1: parser
     pub span: InputSpan,
-    pub module_name: Identifier,
+    pub module: Identifier,
     pub alias: Identifier,
     pub module_id: RootId,
 }
 
 #[derive(Debug, Clone)]
 pub struct AliasedSymbol {
-    // Phase 1: parser
-    pub position: InputPosition,
     pub name: Identifier,
-    pub alias: Identifier,
+    pub alias: Option<Identifier>,
     pub definition_id: DefinitionId,
 }
 
@@ -347,8 +349,8 @@ pub struct ImportSymbols {
     pub this: ImportId,
     // Phase 1: parser
     pub span: InputSpan,
-    pub module_name: Vec<u8>,
-    pub module_id: Option<RootId>,
+    pub module: Identifier,
+    pub module_id: RootId,
     pub symbols: Vec<AliasedSymbol>,
 }
 
@@ -604,11 +606,9 @@ pub enum ParserTypeVariant {
     // Basic builtin
     Message,
     Bool,
-    Byte,
-    Short,
-    Int,
-    Long,
-    String,
+    UInt8, Uint16, UInt32, UInt64,
+    SInt8, SInt16, SInt32, SInt64,
+    Character, String,
     // Literals (need to get concrete builtin type during typechecking)
     IntegerLiteral,
     Inferred,
@@ -1079,7 +1079,6 @@ impl VariableScope for Definition {
 
 #[derive(Debug, Clone)]
 pub struct StructFieldDefinition {
-    pub position: InputPosition,
     pub field: Identifier,
     pub parser_type: ParserTypeId,
 }
diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs
index dd338abccd1d9a1dcaddf15254327f9f8320b7b9..ddb1fa2942d742b89be6198f1962e30cee85809d 100644
--- a/src/protocol/ast_printer.rs
+++ b/src/protocol/ast_printer.rs
@@ -238,7 +238,7 @@ impl ASTWriter {
                 self.kv(indent).with_id(PREFIX_IMPORT_ID, import.this.index)
                     .with_s_key("ImportModule");
 
-                self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module_name);
+                self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module);
                 self.kv(indent2).with_s_key("Alias").with_ascii_val(&import.alias.value);
                 self.kv(indent2).with_s_key("Target")
                     .with_opt_disp_val(import.module_id.as_ref().map(|v| &v.index));
@@ -247,7 +247,7 @@ impl ASTWriter {
                 self.kv(indent).with_id(PREFIX_IMPORT_ID, import.this.index)
                     .with_s_key("ImportSymbol");
 
-                self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module_name);
+                self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module);
                 self.kv(indent2).with_s_key("Target")
                     .with_opt_disp_val(import.module_id.as_ref().map(|v| &v.index));
 
diff --git a/src/protocol/input_source2.rs b/src/protocol/input_source2.rs
index f2be5f9812a61dc4a6485c25c288b354cdcd47ca..ca14cb91c4ca6c00e130029c16f926d451862731 100644
--- a/src/protocol/input_source2.rs
+++ b/src/protocol/input_source2.rs
@@ -83,10 +83,16 @@ impl InputSource2 {
         }
     }
 
-    pub fn section(&self, start: InputPosition2, end: InputPosition2) -> &[u8] {
+    #[inline]
+    pub fn section_at_pos(&self, start: InputPosition2, end: InputPosition2) -> &[u8] {
         &self.input[start.offset as usize..end.offset as usize]
     }
 
+    #[inline]
+    pub fn section_at_span(&self, span: InputSpan) -> &[u8] {
+        &self.input[span.begin.offset as usize..span.end.offset as usize]
+    }
+
     // Consumes the next character. Will check well-formedness of newlines: \r
     // must be followed by a \n, because this is used for error reporting. Will
     // not check for ascii-ness of the file, better left to a tokenizer.
diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs
index 9642d69a11378626d2dfacc5f3bb509327c77541..4b8af9794cce90972f97907da61a9c9749bb52ca 100644
--- a/src/protocol/lexer.rs
+++ b/src/protocol/lexer.rs
@@ -2526,7 +2526,7 @@ impl Lexer<'_> {
             h.alloc_import(|this| Import::Module(ImportModule{
                 this,
                 position,
-                module_name: value,
+                module: value,
                 alias,
                 module_id: None,
             }))
@@ -2575,7 +2575,7 @@ impl Lexer<'_> {
                 h.alloc_import(|this| Import::Symbols(ImportSymbols{
                     this,
                     position,
-                    module_name: value,
+                    module: value,
                     module_id: None,
                     symbols,
                 }))
@@ -2584,7 +2584,7 @@ impl Lexer<'_> {
                 h.alloc_import(|this| Import::Symbols(ImportSymbols{
                     this,
                     position,
-                    module_name: value,
+                    module: value,
                     module_id: None,
                     symbols: Vec::new()
                 }))
@@ -2603,7 +2603,7 @@ impl Lexer<'_> {
                 h.alloc_import(|this| Import::Symbols(ImportSymbols{
                     this,
                     position,
-                    module_name: value,
+                    module: value,
                     module_id: None,
                     symbols: vec![AliasedSymbol{
                         position,
@@ -2621,7 +2621,7 @@ impl Lexer<'_> {
             h.alloc_import(|this| Import::Module(ImportModule{
                 this,
                 position,
-                module_name: value,
+                module: value,
                 alias: Identifier{
                     position: last_ident_pos,
                     value: Vec::from(alias_value),
diff --git a/src/protocol/lexer2.rs b/src/protocol/lexer2.rs
deleted file mode 100644
index 4ccf5b8e8601ef99d96b2b8acb77d5ed5304ef08..0000000000000000000000000000000000000000
--- a/src/protocol/lexer2.rs
+++ /dev/null
@@ -1,664 +0,0 @@
-use crate::protocol::ast::*;
-use crate::protocol::Heap;
-use crate::collections::{StringPool, StringRef};
-use crate::protocol::tokenizer::*;
-use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError};
-use crate::protocol::symbol_table2::*;
-
-#[derive(PartialEq, Eq)]
-enum ModuleCompilationPhase {
-    Source,                 // only source is set
-    Tokenized,              // source is tokenized
-    DefinitionsScanned,     // all definitions are linked to their type class
-    ImportsResolved,        // all imports are added to the symbol table
-    Parsed,                 // produced the AST for the module
-    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
-    Typed,                  // Type inference and checking has been performed
-}
-
-enum KeywordDefinition {
-    Struct,
-    Enum,
-    Union,
-    Function,
-    Primitive,
-    Composite,
-}
-
-struct Module {
-    // Buffers
-    source: InputSource,
-    tokens: TokenBuffer,
-    // Identifiers
-    root_id: RootId,
-    name: Option<(PragmaId, StringRef<'static>)>,
-    version: Option<(PragmaId, i64)>,
-    phase: ModuleCompilationPhase,
-}
-
-struct Ctx<'a> {
-    heap: &'a mut Heap,
-    symbols: &'a mut SymbolTable,
-    pool: &'a mut StringPool,
-}
-
-/// Scans the module and finds all module-level type definitions. These will be
-/// added to the symbol table such that during AST-construction we know which
-/// identifiers point to types. Will also parse all pragmas to determine module
-/// names.
-pub(crate) struct PassPreSymbol {
-    symbols: Vec<Symbol>,
-    pragmas: Vec<PragmaId>,
-    imports: Vec<ImportId>,
-    definitions: Vec<DefinitionId>,
-    buffer: String,
-    has_pragma_version: bool,
-    has_pragma_module: bool,
-}
-
-impl PassPreSymbol {
-    pub(crate) fn new() -> Self {
-        Self{
-            symbols: Vec::with_capacity(128),
-            pragmas: Vec::with_capacity(8),
-            imports: Vec::with_capacity(32),
-            definitions: Vec::with_capacity(128),
-            buffer: String::with_capacity(128),
-            has_pragma_version: false,
-            has_pragma_module: false,
-        }
-    }
-
-    fn reset(&mut self) {
-        self.symbols.clear();
-        self.pragmas.clear();
-        self.imports.clear();
-        self.definitions.clear();
-        self.has_pragma_version = false;
-        self.has_pragma_module = false;
-    }
-
-    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
-        self.reset();
-
-        let module = &mut modules[module_idx];
-        let module_range = &module.tokens.ranges[0];
-
-        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
-        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
-        debug_assert!(module.root_id.is_invalid()); // not set yet,
-
-        // Preallocate root in the heap
-        let root_id = ctx.heap.alloc_protocol_description(|this| {
-            Root{
-                this,
-                pragmas: Vec::new(),
-                imports: Vec::new(),
-                definitions: Vec::new(),
-            }
-        });
-        module.root_id = root_id;
-
-        // Visit token ranges to detect definitions and pragmas
-        let mut range_idx = module_range.first_child_idx;
-        loop {
-            let range_idx_usize = range_idx as usize;
-            let cur_range = &module.tokens.ranges[range_idx_usize];
-
-            // Parse if it is a definition or a pragma
-            if cur_range.range_kind == TokenRangeKind::Definition {
-                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
-            } else if cur_range.range_kind == TokenRangeKind::Pragma {
-                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
-            }
-
-            match cur_range.next_sibling_idx {
-                Some(idx) => { range_idx = idx; },
-                None => { break; },
-            }
-        }
-
-        // Add the module's symbol scope and the symbols we just parsed
-        let module_scope = SymbolScope::Module(root_id);
-        ctx.symbols.insert_scope(None, module_scope);
-        for symbol in self.symbols.drain(..) {
-            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
-                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol))
-            }
-        }
-
-        // Modify the preallocated root
-        let root = &mut ctx.heap[root_id];
-        root.pragmas.extend(self.pragmas.drain(..));
-        root.definitions.extend(self.definitions.drain(..));
-        module.phase = ModuleCompilationPhase::DefinitionsScanned;
-
-        Ok(())
-    }
-
-    fn visit_pragma_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
-        let module = &modules[module_idx];
-        let range = &module.tokens.ranges[range_idx];
-        let mut iter = module.tokens.iter_range(range);
-
-        // Consume pragma name
-        let (pragma_section, pragma_start, _) = consume_pragma(&self.source, &mut iter)?;
-
-        // Consume pragma values
-        if pragma_section == "#module" {
-            // Check if name is defined twice within the same file
-            if self.has_pragma_module {
-                return Err(ParseError::new_error(&module.source, pragma_start, "module name is defined twice"));
-            }
-
-            // Consume the domain-name
-            let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
-            if iter.next().is_some() {
-                return Err(ParseError::new_error(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
-            }
-
-            // Add to heap and symbol table
-            let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
-            let module_name = ctx.pool.intern(module_name);
-            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
-                this,
-                span: pragma_span,
-                value: Identifier{ span: module_span, value: module_name.clone() },
-            }));
-            self.pragmas.push(pragma_id);
-
-            if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name, module.root_id) {
-                // Naming conflict
-                let this_module = &modules[module_idx];
-                let other_module = seek_module(modules, other_module_root_id).unwrap();
-                let (other_module_pragma_id, _) = other_module.name.unwrap();
-                let other_pragma = ctx.heap[other_module_pragma_id].as_module();
-                return Err(ParseError::new_error_str_at_span(
-                    &this_module.source, pragma_span, "conflict in module name"
-                ).with_info_str_at_span(
-                    &other_module.source, other_pragma.span, "other module is defined here"
-                ));
-            }
-            self.has_pragma_module = true;
-        } else if pragma_section == "#version" {
-            // Check if version is defined twice within the same file
-            if self.has_pragma_version {
-                return Err(ParseError::new_error(&module.source, pragma_start, "module version is defined twice"));
-            }
-
-            // Consume the version pragma
-            let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
-            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
-                this,
-                span: InputSpan::from_positions(pragma_start, version_span.end),
-                version,
-            }));
-            self.pragmas.push(pragma_id);
-            self.has_pragma_version = true;
-        } else {
-            // Custom pragma, maybe we support this in the future, but for now
-            // we don't.
-            return Err(ParseError::new_error(&module.source, pragma_start, "illegal pragma name"));
-        }
-
-        Ok(())
-    }
-
-    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
-        let module = &modules[module_idx];
-        let range = &module.tokens.ranges[range_idx];
-        let definition_span = InputSpan::from_positions(
-            module.tokens.start_pos(range),
-            module.tokens.end_pos(range)
-        );
-        let mut iter = module.tokens.iter_range(range);
-
-        // First ident must be type of symbol
-        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
-        let kw = parse_definition_keyword(kw_text).unwrap();
-
-        // Retrieve identifier of definition
-        let (identifier_text, identifier_span) = consume_ident(&module.source, &mut iter)?;
-        let ident_text = ctx.pool.intern(identifier_text);
-        let identifier = Identifier{ span: identifier_span, value: ident_text.clone() };
-
-        // Reserve space in AST for definition and add it to the symbol table
-        let definition_class;
-        let ast_definition_id;
-        match kw {
-            KeywordDefinition::Struct => {
-                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
-                    StructDefinition::new_empty(this, definition_span, identifier)
-                });
-                definition_class = DefinitionClass::Struct;
-                ast_definition_id = struct_def_id.upcast();
-            },
-            KeywordDefinition::Enum => {
-                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
-                    EnumDefinition::new_empty(this, definition_span, identifier)
-                });
-                definition_class = DefinitionClass::Enum;
-                ast_definition_id = enum_def_id.upcast();
-            },
-            KeywordDefinition::Union => {
-                let union_def_id = ctx.heap.alloc_union_definition(|this| {
-                    UnionDefinition::new_empty(this, definition_span, identifier)
-                });
-                definition_class = DefinitionClass::Union;
-                ast_definition_id = union_def_id.upcast()
-            },
-            KeywordDefinition::Function => {
-                let func_def_id = ctx.heap.alloc_function_definition(|this| {
-                    FunctionDefinition::new_empty(this, definition_span, identifier)
-                });
-                definition_class = DefinitionClass::Function;
-                ast_definition_id = func_def_id.upcast();
-            },
-            KeywordDefinition::Primitive | KeywordDefinition::Composite => {
-                let component_variant = if kw == KeywordDefinition::Primitive {
-                    ComponentVariant::Primitive
-                } else {
-                    ComponentVariant::Composite
-                };
-                let comp_def_id = ctx.heap.alloc_component_definition(|this| {
-                    ComponentDefinition::new_empty(this, definition_span, component_variant, identifier)
-                });
-                definition_class = DefinitionClass::Component;
-                ast_definition_id = comp_def_id.upcast();
-            }
-        }
-
-        let symbol = Symbol{
-            name: ident_text,
-            data: SymbolVariant::Definition(SymbolDefinition{
-                defined_in_module: module.root_id,
-                defined_in_scope: SymbolScope::Module(module.root_id),
-                definition_span,
-                identifier_span,
-                introduced_at: None,
-                class: definition_class,
-                definition_id: ast_definition_id,
-            }),
-        };
-        self.symbols.push(symbol);
-        self.definitions.push(ast_definition_id);
-
-        Ok(())
-    }
-}
-
-/// Parses all the imports in the module tokens. Is applied after the
-/// definitions and name of modules are resolved. Hence we should be able to
-/// resolve all symbols to their appropriate module/definition.
-pub(crate) struct PassImport {
-    imports: Vec<ImportId>,
-}
-
-impl PassImport {
-    pub(crate) fn new() -> Self {
-        Self{ imports: Vec::with_capacity(32) }
-    }
-    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
-        let module = &modules[module_idx];
-        let module_range = &module.tokens.ranges[0];
-        debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::DefinitionsScanned));
-        debug_assert_eq!(module.phase, ModuleCompilationPhase::DefinitionsScanned);
-        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
-
-        let mut range_idx = module_range.first_child_idx;
-        loop {
-            let range_idx_usize = range_idx as usize;
-            let cur_range = &module.tokens.ranges[range_idx_usize];
-
-            if cur_range.range_kind == TokenRangeKind::Import {
-                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
-            }
-
-            match cur_range.next_sibling_idx {
-                Some(idx) => { range_idx = idx; },
-                None => { break; }
-            }
-        }
-
-        Ok(())
-    }
-
-    pub(crate) fn visit_import_range(
-        &mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize
-    ) -> Result<(), ParseError> {
-        let module = &modules[module_idx];
-        let import_range = &module.tokens.ranges[range_idx];
-        debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
-
-        let mut iter = module.tokens.iter_range(import_range);
-
-        // Consume "import"
-        let (_import_ident, import_span) =
-            consume_ident(&module.source, &mut iter)?;
-        debug_assert_eq!(_import_ident, KW_IMPORT);
-
-        // Consume module name
-        let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
-        let target_root_id = ctx.symbols.get_module_by_name(module_name);
-        if target_root_id.is_none() {
-            return Err(ParseError::new_error_at_span(
-                &module.source, module_name_span,
-                format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
-            ));
-        }
-        let module_name = ctx.pool.intern(module_name);
-        let target_root_id = target_root_id.unwrap();
-
-        // Check for subsequent characters
-        let next = iter.next();
-        if has_ident(&module.source, &mut iter, b"as") {
-            iter.consume();
-            let (alias_text, alias_span) = consume_ident(source, &mut iter)?;
-            let alias = ctx.pool.intern(alias_text);
-
-            let import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
-                this,
-                span: import_span,
-                module_name: Identifier{ span: module_name_span, value: module_name },
-                alias: Identifier{ span: alias_span, value: alias.clone() },
-                module_id: target_root_id
-            }));
-            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
-                name: alias,
-                data: SymbolVariant::Module(SymbolModule{
-                    root_id: target_root_id,
-                    introduced_at: import_id,
-                }),
-            });
-        } else if Some(TokenKind::ColonColon) == next {
-            fn consume_symbol_and_maybe_alias<'a>(
-                source: &'a InputSource, iter: &mut TokenIter, in_scope: SymbolScope, ctx: &Ctx
-            ) -> Result<(&'a [u8], InputSpan, Option<(&'a [u8], InputSpan)>), ParseError> {
-                // Consume symbol and make sure it points to something valid
-                let (symbol, symbol_span) = consume_ident(source, iter)?;
-                let target = ctx.symbols.get_symbol_by_name_defined_in_scope(in_scope, symbol);
-                if target.is_none() {
-
-                }
-
-                if peek_ident(source, iter) == b"as" {
-                    // Consume alias
-                    iter.consume();
-                    let (alias, alias_span) = consume_ident(source, iter)?;
-                    Ok((symbol, symbol_span, Some((alias, alias_span))))
-                } else {
-                    Ok((symbol, symbol_span, None))
-                }
-            }
-
-            iter.consume();
-
-            let next = iter.next();
-            if Some(TokenKind::Ident) = next {
-                // Importing a single symbol
-                iter.consume();
-                let (symbol_text, symbol_span, maybe_alias) = consume_symbol_and_maybe_alias(&module.source, &mut iter)?;
-                let target_symbol = ctx.symbols.get_symbol_by_name_defined_in_scope(
-                    SymbolScope::Module(target_root_id))
-            } else if Some(TokenKind::OpenCurly) = next {
-                // Importing multiple symbols
-                iter.consume();
-            } else if Some(TokenKind::Star) = next {
-                // Import all symbols from the module
-                iter.consume();
-            } else {
-                return Err(ParseError::new_error_str_at_pos(
-                    &module.source, iter.last_valid_pos(), "expected symbol name, '{' or '*'"
-                ));
-            }
-        } else {
-            // Assume implicit alias, then check if we get the semicolon next
-            let module_name_str = module_name.as_str();
-            let last_ident_start = module_name_str.rfind('.').map_or(0, |v| v + 1);
-            let alias_text = &module_name_str.as_bytes()[last_ident_start..];
-            let alias = ctx.pool.intern(alias_text);
-            let alias_span = InputSpan::from_positions(
-                module_name_span.begin.with_offset(last_ident_start as u32),
-                module_name_span.end
-            );
-        }
-
-        Ok(())
-    }
-}
-
-fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
-    let (_, mut span) = consume_ident(source, iter)?;
-    while let Some(TokenKind::Dot) = iter.next() {
-        iter.consume();
-        let (_, new_span) = consume_ident(source, iter)?;
-        span.end = new_span.end;
-    }
-
-    // Not strictly necessary, but probably a reasonable restriction: this
-    // simplifies parsing of module naming and imports.
-    if span.begin.line != span.end.line {
-        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
-    }
-
-    // If module name consists of a single identifier, then it may not match any
-    // of the reserved keywords
-    let section = source.section(span.begin, span.end);
-    if is_reserved_keyword(section) {
-        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
-    }
-
-    Ok((source.section(span.begin, span.end), span))
-}
-
-/// Consumes a specific expected token. Be careful to only call this with tokens that do not have a
-/// variable length.
-fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<(), ParseError> {
-    if Some(expected) != iter.next() {
-        return Err(ParseError::new_error_at_pos(
-            source, iter.last_valid_pos(),
-            format!("expected '{}'", expected.token_chars())
-        ));
-    }
-    iter.consume();
-    Ok(())
-}
-
-fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
-    if Some(TokenKind::Integer) != iter.next() {
-        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
-    }
-    let (start_pos, end_pos) = iter.next_range();
-    iter.consume();
-
-    let integer_text = source.section(start_pos, end_pos);
-
-    // Determine radix and offset from prefix
-    let (radix, input_offset, radix_name) =
-        if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
-            // Binary number
-            (2, 2, "binary")
-        } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
-            // Octal number
-            (8, 2, "octal")
-        } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
-            // Hexadecimal number
-            (16, 2, "hexadecimal")
-        } else {
-            (10, 0, "decimal")
-        };
-
-    // Take out any of the separating '_' characters
-    buffer.clear();
-    for char_idx in input_offset..integer_text.len() {
-        let char = integer_text[char_idx];
-        if char == b'_' {
-            continue;
-        }
-        if !char.is_ascii_digit() {
-            return Err(ParseError::new_error(source, start_pos, "incorrectly formatted integer"));
-        }
-        buffer.push(char::from(char));
-    }
-
-    // Use the cleaned up string to convert to integer
-    match u64::from_str_radix(&buffer, radix) {
-        Ok(number) => Ok((number, InputSpan::from_positions(start_pos, end_pos))),
-        Err(_) => Err(
-            ParseError::new_error(source, start_pos, "incorrectly formatted integer")
-        ),
-    }
-}
-
-fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
-    for module in modules {
-        if module.root_id == root_id {
-            return Some(module)
-        }
-    }
-
-    return None
-}
-
-fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
-    if Some(TokenKind::Pragma) != iter.next() {
-        return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected a pragma"));
-    }
-    let (pragma_start, pragma_end) = iter.next_range();
-    iter.consume();
-    Ok((source.section(pragma_start, pragma_end), pragma_start, pragma_end))
-}
-
-fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
-    if Some(TokenKind::Ident) == iter.next() {
-        let (start, end) = iter.next_range();
-        return source.section(start, end) == expected;
-    }
-
-    false
-}
-
-fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
-    if Some(TokenKind::Ident) == iter.next() {
-        let (start, end) = iter.next_range();
-        return Some(source.section(start, end))
-    }
-
-    None
-}
-
-/// Consumes any identifier and returns it together with its span. Does not
-/// check if the identifier is a reserved keyword.
-fn consume_any_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
-    if Some(TokenKind::Ident) != iter.next() {
-        return Err(ParseError::new_error(sourcee, iter.last_valid_pos(), "expected an identifier"));
-    }
-    let (ident_start, ident_end) = iter.next_range();
-    iter.consume();
-    Ok((source.section(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
-}
-
-/// Consumes an identifier that is not a reserved keyword and returns it
-/// together with its span.
-fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
-    let (ident, span) = consume_any_ident(source, iter)?;
-    if is_reserved_keyword(ident) {
-        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
-    }
-
-    Ok((ident, span))
-}
-
-fn is_reserved_definition_keyword(text: &[u8]) -> bool {
-    return ([
-        b"struct", b"enum", b"union", b"function", b"primitive"
-    ] as &[[u8]]).contains(text)
-}
-
-fn is_reserved_statement_keyword(text: &[u8]) -> bool {
-    return ([
-        b"channel", b"import", b"as",
-        b"if", b"while", b"break", b"continue", b"goto", b"return",
-        b"synchronous", b"assert", b"new",
-    ] as &[[u8]]).contains(text)
-}
-
-fn is_reserved_expression_keyword(text: &[u8]) -> bool {
-    return ([
-        b"let", b"true", b"false", b"null", // literals
-        b"get", b"put", b"fires", b"create", b"length", // functions
-    ] as &[[u8]]).contains(text)
-}
-
-fn is_reserved_type_keyword(text: &[u8]) -> bool {
-    return ([
-        b"in", b"out", b"msg",
-        b"bool",
-        b"u8", b"u16", b"u32", b"u64",
-        b"s8", b"s16", b"s32", b"s64",
-        b"auto"
-    ] as &[[u8]]).contains(text)
-}
-
-fn is_reserved_keyword(text: &[u8]) -> bool {
-    return
-        is_reserved_definition_keyword(text) ||
-        is_reserved_statement_keyword(text) ||
-        is_reserved_type_keyword(text);
-}
-
-/// Constructs a human-readable message indicating why there is a conflict of
-/// symbols.
-// Note: passing the `module_idx` is not strictly necessary, but will prevent
-// programmer mistakes during development: we get a conflict because we're
-// currently parsing a particular module.
-fn construct_symbol_conflict_error(modules: &[Module], module_idx: usize, ctx: &Ctx, new_symbol: &Symbol, old_symbol: &Symbol) -> ParseError {
-    let module = &modules[module_idx];
-    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) {
-        match symbol.introduced_at {
-            Some(import_id) => {
-                // Symbol is being imported
-                let import = &ctx.heap[import_id];
-                match import {
-                    Import::Module(import) => (
-                        format!("the module aliased as '{}' imported here", symbol.name.as_str()),
-                        import.span
-                    ),
-                    Import::Symbols(symbols) => (
-                        format!("the type '{}' imported here", symbol.name.as_str()),
-                        symbols.span
-                    ),
-                }
-            },
-            None => {
-                // Symbol is being defined
-                debug_assert_eq!(symbol.defined_in_module, module.root_id);
-                debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module);
-                (
-                    format!("the type '{}' defined here", symbol.name.as_str()),
-                    symbol.identifier_span
-                )
-            }
-        }
-    };
-
-    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
-    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
-    return ParseError::new_error_at_span(
-        &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
-    ).with_info_at_span(
-        &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
-    )
-}
-
-fn parse_definition_keyword(keyword: &[u8]) -> Option<KeywordDefinition> {
-    match keyword {
-        KW_STRUCT =>    Some(Keyword::Struct),
-        KW_ENUM =>      Some(Keyword::Enum),
-        KW_UNION =>     Some(Keyword::Union),
-        KW_FUNCTION =>  Some(Keyword::Function),
-        KW_PRIMITIVE => Some(Keyword::Primitive),
-        KW_COMPOSITE => Some(Keyword::Composite),
-        _ => None
-    }
-}
\ No newline at end of file
diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs
index 62e11f9b99130635e5d871827f4e855f9d6f7a83..95f635752057d7e6aade690a6d91755d5d978c48 100644
--- a/src/protocol/mod.rs
+++ b/src/protocol/mod.rs
@@ -4,7 +4,6 @@ mod eval;
 pub(crate) mod inputsource;
 pub(crate) mod input_source2;
 // mod lexer;
-mod tokenizer;
 mod parser;
 #[cfg(test)] mod tests;
 
@@ -12,7 +11,6 @@ mod parser;
 pub(crate) mod ast;
 pub(crate) mod ast_printer;
 pub(crate) mod lexer;
-pub(crate) mod lexer2;
 
 lazy_static::lazy_static! {
     /// Conveniently-provided protocol description initialized with a zero-length PDL string.
diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs
index 0110d950b8d80c1db45d1a5286816baf75a23042..4809745fac292421922b8a6c613c774c14c72fe1 100644
--- a/src/protocol/parser/mod.rs
+++ b/src/protocol/parser/mod.rs
@@ -2,25 +2,61 @@ mod depth_visitor;
 pub(crate) mod symbol_table;
 pub(crate) mod symbol_table2;
 pub(crate) mod type_table;
+pub(crate) mod tokens;
+pub(crate) mod token_parsing;
+pub(crate) mod pass_tokenizer;
+pub(crate) mod pass_symbols;
+pub(crate) mod pass_imports;
+pub(crate) mod pass_definitions;
 mod type_resolver;
 mod visitor;
 mod visitor_linker;
 mod utils;
 
 use depth_visitor::*;
-use symbol_table::SymbolTable;
+use tokens::*;
+use crate::collections::*;
+use symbol_table2::SymbolTable;
 use visitor::Visitor2;
 use visitor_linker::ValidityAndLinkerVisitor;
 use type_resolver::{TypeResolvingVisitor, ResolveQueue};
 use type_table::{TypeTable, TypeCtx};
 
 use crate::protocol::ast::*;
-use crate::protocol::inputsource::*;
+use crate::protocol::input_source2::{InputSource2 as InputSource};
 use crate::protocol::lexer::*;
 
 use std::collections::HashMap;
 use crate::protocol::ast_printer::ASTWriter;
 
+#[derive(PartialEq, Eq)]
+pub enum ModuleCompilationPhase {
+    Source,                 // only source is set
+    Tokenized,              // source is tokenized
+    SymbolsScanned,         // all definitions are linked to their type class
+    ImportsResolved,        // all imports are added to the symbol table
+    DefinitionsParsed,      // produced the AST for the entire module
+    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
+    Typed,                  // Type inference and checking has been performed
+}
+
+pub struct Module {
+    // Buffers
+    source: InputSource,
+    tokens: TokenBuffer,
+    // Identifiers
+    root_id: RootId,
+    name: Option<(PragmaId, StringRef<'static>)>,
+    version: Option<(PragmaId, i64)>,
+    phase: ModuleCompilationPhase,
+}
+
+pub struct PassCtx<'a> {
+    heap: &'a mut Heap,
+    symbols: &'a mut SymbolTable,
+    pool: &'a mut StringPool,
+}
+
 // TODO: @fixme, pub qualifier
 pub(crate) struct LexedModule {
     pub(crate) source: InputSource,
@@ -156,13 +192,13 @@ impl Parser {
             match import {
                 Import::Module(import) => {
                     debug_assert!(import.module_id.is_none(), "module import already resolved");
-                    let target_module_id = self.symbol_table.resolve_module(&import.module_name)
+                    let target_module_id = self.symbol_table.resolve_module(&import.module)
                         .expect("module import is resolved by symbol table");
                     import.module_id = Some(target_module_id)
                 },
                 Import::Symbols(import) => {
                     debug_assert!(import.module_id.is_none(), "module of symbol import already resolved");
-                    let target_module_id = self.symbol_table.resolve_module(&import.module_name)
+                    let target_module_id = self.symbol_table.resolve_module(&import.module)
                         .expect("symbol import's module is resolved by symbol table");
                     import.module_id = Some(target_module_id);
 
diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs
new file mode 100644
index 0000000000000000000000000000000000000000..3bbfe219acd57fc1d373aadbeca584564733403d
--- /dev/null
+++ b/src/protocol/parser/pass_definitions.rs
@@ -0,0 +1,199 @@
+use crate::protocol::ast::*;
+use super::symbol_table2::*;
+use super::{Module, ModuleCompilationPhase, PassCtx};
+use super::tokens::*;
+use super::token_parsing::*;
+use crate::protocol::input_source2::{InputSource2 as InputSource, InputSpan, ParseError};
+use crate::collections::*;
+
+/// Parses all the tokenized definitions into actual AST nodes.
+pub(crate) struct PassDefinitions {
+    identifiers: Vec<Identifier>,
+    struct_fields: Vec<StructFieldDefinition>,
+}
+
+impl PassDefinitions {
+    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let module_range = &module.tokens.ranges[0];
+        debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
+        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
+
+        let mut range_idx = module_range.first_child_idx;
+        loop {
+            let range_idx_usize = range_idx as usize;
+            let cur_range = &module.tokens.ranges[range_idx_usize];
+
+            if cur_range.range_kind == TokenRangeKind::Definition {
+                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
+
+            match cur_range.next_sibling_idx {
+                Some(idx) => { range_idx = idx; },
+                None => { break; },
+            }
+        }
+
+
+
+        Ok(())
+    }
+
+    fn visit_definition_range(
+        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
+    ) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let cur_range = &module.tokens.ranges[range_idx];
+        debug_assert_eq!(cur_range.range_kind, TokenRangeKind::Definition);
+
+        // Detect which definition we're parsing
+        let mut iter = module.tokens.iter_range(cur_range);
+        let keyword = peek_ident(&module.source, &mut iter).unwrap();
+        match keyword {
+            KW_STRUCT => {
+
+            },
+            KW_ENUM => {
+
+            },
+            KW_UNION => {
+
+            },
+            KW_FUNCTION => {
+
+            },
+            KW_PRIMITIVE => {
+
+            },
+            KW_COMPOSITE => {
+
+            },
+            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(keyword)),
+        };
+
+        Ok(())
+    }
+
+    fn visit_struct_definition(
+        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
+    ) -> Result<(), ParseError> {
+        // Consume struct and name of struct
+        let struct_span = consume_exact_ident(&module.source, iter, b"struct");
+        let (ident_text, _) = consume_ident(&module.source, iter)?;
+
+        // We should have preallocated the definition in the heap, retrieve its identifier
+        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(SymbolScope::Module(module.root_id), ident_text)
+            .unwrap().variant.as_definition().definition_id;
+
+        consume_polymorphic_vars(source, iter, ctx, &mut self.identifiers)?;
+        debug_assert!(self.struct_fields.is_empty());
+        consume_comma_separated(
+            TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter,
+            |source, iter| {
+                let field = consume_ident_interned(source, iter, ctx)?;
+
+                StructFieldDefinition{ field, parser_type }
+            },
+            &mut self.struct_fields, "a struct field", "a list of struct fields"
+        )
+    }
+}
+
+enum TypeKind {
+    Message,
+    Bool,
+    UInt8, UInt16, UInt32, UInt64,
+    SInt8, SInt16, SInt32, SInt64,
+    Character, String,
+    Inferred,
+    Array,
+    Input,
+    Output,
+    SymbolicDefinition(DefinitionId),
+    SymbolicPolyArg(DefinitionId, usize),
+}
+
+/// Consumes a type. A type always starts with an identifier which may indicate
+/// a builtin type or a user-defined type. The fact that it may contain
+/// polymorphic arguments makes it a tree-like structure. Because we cannot rely
+/// on knowing the exact number of polymorphic arguments we do not check for
+/// these.
+fn consume_parser_type(
+    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, poly_vars: &[Identifier]
+) -> Result<(), ParseError> {
+    struct StackEntry {
+        angle_depth: i32,
+    }
+    let mut type_stack = Vec::new();
+
+    Ok(())
+}
+
+fn consume_parser_type_ident(
+    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
+    mut scope: SymbolScope, wrapping_definition: DefinitionId, poly_vars: &[Identifier]
+) -> Result<(TypeKind, InputSpan), ParseError> {
+    let (type_text, type_span) = consume_any_ident(source, iter)?;
+
+    let type_kind = match type_text {
+        KW_TYPE_MESSAGE => TypeKind::Message,
+        KW_TYPE_BOOL => TypeKind::Bool,
+        KW_TYPE_UINT8 => TypeKind::UInt8,
+        KW_TYPE_UINT16 => TypeKind::UInt16,
+        KW_TYPE_UINT32 => TypeKind::UInt32,
+        KW_TYPE_UINT64 => TypeKind::UInt64,
+        KW_TYPE_SINT8 => TypeKind::SInt8,
+        KW_TYPE_SINT16 => TypeKind::SInt16,
+        KW_TYPE_SINT32 => TypeKind::SInt32,
+        KW_TYPE_SINT64 => TypeKind::SInt64,
+        KW_TYPE_IN_PORT => TypeKind::Input,
+        KW_TYPE_OUT_PORT => TypeKind::Output,
+        _ => {
+            // Must be some kind of symbolic type
+            let mut type_kind = None;
+            for (poly_idx, poly_var) in poly_vars.iter().enumerate() {
+                if poly_var.value.as_bytes() == type_text {
+                    type_kind = Some(TypeKind::SymbolicPolyArg(wrapping_definition, poly_idx));
+                }
+            }
+
+            if type_kind.is_none() {
+                // Check symbol table for definition
+                let last_symbol = ctx.symbols.get_symbol_by_name(scope, type_text);
+                if last_symbol.is_none() {
+                    return Err(ParseError::new_error_str_at_span(source, type_span, "unknown type"));
+                }
+                let last_symbol = last_symbol.unwrap();
+                match last_symbol.variant {
+                    SymbolVariant::Module(symbol_module) => {
+                        // Keep seeking
+                    },
+                    SymbolVariant::Definition(symbol_definition) => {
+
+                    }
+                }
+            }
+        }
+    }
+
+
+    Ok(())
+}
+
+/// Consumes polymorphic variables (i.e. a list of identifiers). If the list is
+/// absent then we simply return an empty array.
+fn consume_polymorphic_vars(
+    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut Vec<Identifier>
+) -> Result<(), ParseError> {
+    // Note: because this is just a list of identifiers, we don't have to take
+    // two `TokenKind::CloseAngle` interpreted as `TokenKind::ShiftRight` into
+    // account.
+    debug_assert!(target.is_empty());
+    maybe_consume_comma_separated(
+        TokenKind::OpenAngle, TokenKind::CloseAngle, source, iter,
+        |source, iter| consume_ident_interned(source, iter, ctx),
+        target, "a polymorphic variable"
+    )?;
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/src/protocol/parser/pass_imports.rs b/src/protocol/parser/pass_imports.rs
new file mode 100644
index 0000000000000000000000000000000000000000..388d6a3f33119cf93551e89096e2eff8dd65a86c
--- /dev/null
+++ b/src/protocol/parser/pass_imports.rs
@@ -0,0 +1,308 @@
+use crate::protocol::ast::*;
+use super::symbol_table2::*;
+use super::{Module, ModuleCompilationPhase, PassCtx};
+use super::tokens::*;
+use super::token_parsing::*;
+use crate::protocol::input_source2::{InputSource2 as InputSource, InputSpan, ParseError};
+use crate::collections::*;
+
+/// Parses all the imports in the module tokens. Is applied after the
+/// definitions and name of modules are resolved. Hence we should be able to
+/// resolve all symbols to their appropriate module/definition.
+pub(crate) struct PassImport {
+    imports: Vec<ImportId>,
+    found_symbols: Vec<(AliasedSymbol, SymbolDefinition)>,
+    scoped_symbols: Vec<Symbol>,
+}
+
+impl PassImport {
+    pub(crate) fn new() -> Self {
+        Self{
+            imports: Vec::with_capacity(32),
+            found_symbols: Vec::with_capacity(32),
+            scoped_symbols: Vec::with_capacity(32),
+        }
+    }
+    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let module_range = &module.tokens.ranges[0];
+        debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::SymbolsScanned));
+        debug_assert_eq!(module.phase, ModuleCompilationPhase::SymbolsScanned);
+        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
+
+        let mut range_idx = module_range.first_child_idx;
+        loop {
+            let range_idx_usize = range_idx as usize;
+            let cur_range = &module.tokens.ranges[range_idx_usize];
+
+            if cur_range.range_kind == TokenRangeKind::Import {
+                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
+
+            match cur_range.next_sibling_idx {
+                Some(idx) => { range_idx = idx; },
+                None => { break; }
+            }
+        }
+
+        let root = &mut ctx.heap[module.root_id];
+        root.imports.extend(self.imports.drain(..));
+
+        let module = &mut modules[module_idx];
+        module.phase = ModuleCompilationPhase::ImportsResolved;
+
+        Ok(())
+    }
+
+    pub(crate) fn visit_import_range(
+        &mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
+    ) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let import_range = &module.tokens.ranges[range_idx];
+        debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
+
+        let mut iter = module.tokens.iter_range(import_range);
+
+        // Consume "import"
+        let (_import_ident, import_span) =
+            consume_any_ident(&module.source, &mut iter)?;
+        debug_assert_eq!(_import_ident, KW_IMPORT);
+
+        // Consume module name
+        let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
+        let target_root_id = ctx.symbols.get_module_by_name(module_name);
+        if target_root_id.is_none() {
+            return Err(ParseError::new_error_at_span(
+                &module.source, module_name_span,
+                format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
+            ));
+        }
+        let module_name = ctx.pool.intern(module_name);
+        let module_identifier = Identifier{ span: module_name_span, value: module_name };
+        let target_root_id = target_root_id.unwrap();
+
+        // Check for subsequent characters (alias, multiple imported symbols)
+        let next = iter.next();
+        let import_id;
+
+        if has_ident(&module.source, &mut iter, b"as") {
+            // Alias for module
+            iter.consume();
+            let alias_identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
+            let alias_name = alias_identifier.value.clone();
+
+            import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
+                this,
+                span: import_span,
+                module: module_identifier,
+                alias: alias_identifier,
+                module_id: target_root_id
+            }));
+            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
+                name: alias_name,
+                variant: SymbolVariant::Module(SymbolModule{
+                    root_id: target_root_id,
+                    introduced_at: import_id,
+                }),
+            });
+        } else if Some(TokenKind::ColonColon) == next {
+            iter.consume();
+
+            // Helper function to consume symbols, their alias, and the
+            // definition the symbol is pointing to.
+            fn consume_symbol_and_maybe_alias<'a>(
+                source: &'a InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
+                module_name: &StringRef<'static>, module_root_id: RootId,
+            ) -> Result<(AliasedSymbol, SymbolDefinition), ParseError> {
+                // Consume symbol name and make sure it points to an existing definition
+                let symbol_identifier = consume_ident_interned(source, iter, ctx)?;
+                let target = ctx.symbols.get_symbol_by_name_defined_in_scope(
+                    SymbolScope::Module(module_root_id), symbol
+                );
+
+                if target.is_none() {
+                    return Err(ParseError::new_error_at_span(
+                        source, symbol_span,
+                        format!(
+                            "could not find symbol '{}' within module '{}'",
+                            symbol_identifier.value.as_str(), module_name.as_str()
+                        )
+                    ));
+                }
+                let target = target.unwrap();
+                debug_assert_ne!(target.class(), SymbolClass::Module);
+                let target_definition = target.variant.as_definition();
+
+                // Consume alias text if specified
+                let alias_identifier = if peek_ident(source, iter) == b"as" {
+                    // Consume alias
+                    iter.consume();
+                    Some(consume_ident_interned(source, iter, ctx)?)
+                } else {
+                    None
+                };
+
+                Ok((
+                    AliasedSymbol{
+                        name: symbol_identifier,
+                        alias: alias_identifier,
+                        definition_id: target_definition.definition_id,
+                    },
+                    target_definition.clone()
+                ))
+            }
+
+            let next = iter.next();
+
+            if Some(TokenKind::Ident) = next {
+                // Importing a single symbol
+                iter.consume();
+                let (imported_symbol, symbol_definition) = consume_symbol_and_maybe_alias(
+                    &module.source, &mut iter, ctx, &module_identifier.value, target_root_id
+                )?;
+                let alias_identifier = imported_symbol.alias.unwrap_or_else(|| { imported_symbol.name.clone() });
+                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
+                    this,
+                    span: InputSpan::from_positions(import_span.begin, alias_identifier.span.end),
+                    module: module_identifier,
+                    module_id: target_root_id,
+                    symbols: vec![imported_symbol],
+                }));
+                if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
+                    SymbolScope::Module(module.root_id),
+                    Symbol{
+                        name: alias_identifier.value,
+                        variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                    }
+                ) {
+                    return Err(construct_symbol_conflict_error(
+                        modules, module_idx, ctx, &new_symbol, old_symbol
+                    ));
+                }
+            } else if Some(TokenKind::OpenCurly) = next {
+                // Importing multiple symbols
+                consume_comma_separated(
+                    TokenKind::OpenCurly, TokenKind::CloseCurly, source, &mut iter,
+                    |source, iter| consume_symbol_and_maybe_alias(
+                        source, iter, ctx, &module_identifier.value, target_root_id
+                    ),
+                    &mut self.found_symbols, "a symbol", "a list of symbols to import"
+                )?;
+                let end_of_list = iter.last_valid_pos();
+
+                // Preallocate import
+                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols {
+                    this,
+                    span: InputSpan::from_positions(import_span.begin, end_of_list),
+                    module: module_identifier,
+                    module_id: target_root_id,
+                    symbols: Vec::with_capacity(self.found_symbols.len()),
+                }));
+
+                // Fill import symbols while inserting symbols in the
+                // appropriate scope in the symbol table.
+                let import = ctx.heap[import_id].as_symbols_mut();
+
+                for (imported_symbol, symbol_definition) in self.found_symbols.drain(..) {
+                    let import_name = imported_symbol.alias.map_or_else(
+                        || imported_symbol.name.value.clone(),
+                        |v| v.value.clone()
+                    );
+                    import.symbols.push(imported_symbol);
+                    if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
+                        SymbolScope::Module(module.root_id), Symbol{
+                            name: import_name,
+                            variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                        }
+                    ) {
+                        return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol));
+                    }
+                }
+            } else if Some(TokenKind::Star) = next {
+                // Import all symbols from the module
+                let star_span = iter.next_span();
+
+                iter.consume();
+                self.scoped_symbols.clear();
+                let _found = ctx.symbols.get_all_symbols_defined_in_scope(
+                    SymbolScope::Module(target_root_id),
+                    &mut self.scoped_symbols
+                );
+                debug_assert!(_found); // even modules without symbols should have a scope
+
+                // Preallocate import
+                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
+                    this,
+                    span: InputSpan::from_positions(import_span.begin, star_span.end),
+                    module: module_identifier,
+                    module_id: target_root_id,
+                    symbols: Vec::with_capacity(self.scoped_symbols.len())
+                }));
+
+                // Fill import AST node and symbol table
+                let import = ctx.heap[import_id].as_symbols_mut();
+
+                for symbol in self.scoped_symbols.drain(..) {
+                    let symbol_name = symbol.name;
+                    match symbol.variant {
+                        SymbolVariant::Definition(symbol_definition) => {
+                            import.symbols.push(AliasedSymbol{
+                                name: Identifier{ span: star_span, value: symbol.name.clone() },
+                                alias: None,
+                                definition_id: symbol_definition.definition_id,
+                            });
+
+                            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
+                                SymbolScope::Module(module.root_id),
+                                Symbol{
+                                    name: symbol_name,
+                                    variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                                }
+                            ) {
+                                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol));
+                            }
+                        },
+                        _ => unreachable!(),
+                    }
+                }
+            } else {
+                return Err(ParseError::new_error_str_at_pos(
+                    &module.source, iter.last_valid_pos(), "expected symbol name, '{' or '*'"
+                ));
+            }
+        } else {
+            // Assume implicit alias
+            let module_name_str = module_identifier.value.clone();
+            let last_ident_start = module_name_str.rfind('.').map_or(0, |v| v + 1);
+            let alias_text = &module_name_str.as_bytes()[last_ident_start..];
+            let alias = ctx.pool.intern(alias_text);
+            let alias_span = InputSpan::from_positions(
+                module_name_span.begin.with_offset(last_ident_start as u32),
+                module_name_span.end
+            );
+            let alias_identifier = Identifier{ span: alias_span, value: alias.clone() };
+
+            import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
+                this,
+                span: InputSpan::from_positions(import_span.begin, module_identifier.span.end),
+                module: module_identifier,
+                alias: alias_identifier,
+                module_id: target_root_id,
+            }));
+            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
+                name: alias,
+                variant: SymbolVariant::Module(SymbolModule{
+                    root_id: target_root_id,
+                    introduced_at: import_id
+                })
+            });
+        }
+
+        // By now the `import_id` is set, just need to make sure that the import
+        // properly ends with a semicolon
+        consume_token(&module.source, &mut iter, TokenKind::SemiColon)?;
+        self.imports.push(import_id);
+
+        Ok(())
+    }
+}
diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs
new file mode 100644
index 0000000000000000000000000000000000000000..e4acb219a132d1130240084feea0ddfc77ecee73
--- /dev/null
+++ b/src/protocol/parser/pass_symbols.rs
@@ -0,0 +1,250 @@
+use crate::protocol::ast::*;
+use super::symbol_table2::*;
+use crate::protocol::input_source2::{ParseError, InputSpan};
+use super::tokens::*;
+use super::token_parsing::*;
+use super::{Module, ModuleCompilationPhase, PassCtx};
+
+/// Scans the module and finds all module-level type definitions. These will be
+/// added to the symbol table such that during AST-construction we know which
+/// identifiers point to types. Will also parse all pragmas to determine module
+/// names.
+pub(crate) struct PassSymbols {
+    symbols: Vec<Symbol>,
+    pragmas: Vec<PragmaId>,
+    imports: Vec<ImportId>,
+    definitions: Vec<DefinitionId>,
+    buffer: String,
+    has_pragma_version: bool,
+    has_pragma_module: bool,
+}
+
+impl PassSymbols {
+    pub(crate) fn new() -> Self {
+        Self{
+            symbols: Vec::with_capacity(128),
+            pragmas: Vec::with_capacity(8),
+            imports: Vec::with_capacity(32),
+            definitions: Vec::with_capacity(128),
+            buffer: String::with_capacity(128),
+            has_pragma_version: false,
+            has_pragma_module: false,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.symbols.clear();
+        self.pragmas.clear();
+        self.imports.clear();
+        self.definitions.clear();
+        self.has_pragma_version = false;
+        self.has_pragma_module = false;
+    }
+
+    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
+        self.reset();
+
+        let module = &mut modules[module_idx];
+        let module_range = &module.tokens.ranges[0];
+
+        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
+        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
+        debug_assert!(module.root_id.is_invalid()); // not set yet,
+
+        // Preallocate root in the heap
+        let root_id = ctx.heap.alloc_protocol_description(|this| {
+            Root{
+                this,
+                pragmas: Vec::new(),
+                imports: Vec::new(),
+                definitions: Vec::new(),
+            }
+        });
+        module.root_id = root_id;
+
+        // Visit token ranges to detect definitions and pragmas
+        let mut range_idx = module_range.first_child_idx;
+        loop {
+            let range_idx_usize = range_idx as usize;
+            let cur_range = &module.tokens.ranges[range_idx_usize];
+
+            // Parse if it is a definition or a pragma
+            if cur_range.range_kind == TokenRangeKind::Definition {
+                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
+            } else if cur_range.range_kind == TokenRangeKind::Pragma {
+                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
+
+            match cur_range.next_sibling_idx {
+                Some(idx) => { range_idx = idx; },
+                None => { break; },
+            }
+        }
+
+        // Add the module's symbol scope and the symbols we just parsed
+        let module_scope = SymbolScope::Module(root_id);
+        ctx.symbols.insert_scope(None, module_scope);
+        for symbol in self.symbols.drain(..) {
+            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
+                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol))
+            }
+        }
+
+        // Modify the preallocated root
+        let root = &mut ctx.heap[root_id];
+        root.pragmas.extend(self.pragmas.drain(..));
+        root.definitions.extend(self.definitions.drain(..));
+        module.phase = ModuleCompilationPhase::SymbolsScanned;
+
+        Ok(())
+    }
+
+    fn visit_pragma_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let range = &module.tokens.ranges[range_idx];
+        let mut iter = module.tokens.iter_range(range);
+
+        // Consume pragma name
+        let (pragma_section, pragma_start, _) = consume_pragma(&self.source, &mut iter)?;
+
+        // Consume pragma values
+        if pragma_section == "#module" {
+            // Check if name is defined twice within the same file
+            if self.has_pragma_module {
+                return Err(ParseError::new_error(&module.source, pragma_start, "module name is defined twice"));
+            }
+
+            // Consume the domain-name
+            let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
+            if iter.next().is_some() {
+                return Err(ParseError::new_error(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
+            }
+
+            // Add to heap and symbol table
+            let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
+            let module_name = ctx.pool.intern(module_name);
+            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
+                this,
+                span: pragma_span,
+                value: Identifier{ span: module_span, value: module_name.clone() },
+            }));
+            self.pragmas.push(pragma_id);
+
+            if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name, module.root_id) {
+                // Naming conflict
+                let this_module = &modules[module_idx];
+                let other_module = seek_module(modules, other_module_root_id).unwrap();
+                let (other_module_pragma_id, _) = other_module.name.unwrap();
+                let other_pragma = ctx.heap[other_module_pragma_id].as_module();
+                return Err(ParseError::new_error_str_at_span(
+                    &this_module.source, pragma_span, "conflict in module name"
+                ).with_info_str_at_span(
+                    &other_module.source, other_pragma.span, "other module is defined here"
+                ));
+            }
+            self.has_pragma_module = true;
+        } else if pragma_section == "#version" {
+            // Check if version is defined twice within the same file
+            if self.has_pragma_version {
+                return Err(ParseError::new_error(&module.source, pragma_start, "module version is defined twice"));
+            }
+
+            // Consume the version pragma
+            let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
+            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
+                this,
+                span: InputSpan::from_positions(pragma_start, version_span.end),
+                version,
+            }));
+            self.pragmas.push(pragma_id);
+            self.has_pragma_version = true;
+        } else {
+            // Custom pragma, maybe we support this in the future, but for now
+            // we don't.
+            return Err(ParseError::new_error(&module.source, pragma_start, "illegal pragma name"));
+        }
+
+        Ok(())
+    }
+
+    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
+        let module = &modules[module_idx];
+        let range = &module.tokens.ranges[range_idx];
+        let definition_span = InputSpan::from_positions(
+            module.tokens.start_pos(range),
+            module.tokens.end_pos(range)
+        );
+        let mut iter = module.tokens.iter_range(range);
+
+        // First ident must be type of symbol
+        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
+
+        // Retrieve identifier of definition
+        let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
+        let ident_text = identifier.value.clone(); // because we need it later
+
+        // Reserve space in AST for definition and add it to the symbol table
+        let definition_class;
+        let ast_definition_id;
+        match kw_text {
+            KW_STRUCT => {
+                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
+                    StructDefinition::new_empty(this, definition_span, identifier)
+                });
+                definition_class = DefinitionClass::Struct;
+                ast_definition_id = struct_def_id.upcast();
+            },
+            KW_ENUM => {
+                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
+                    EnumDefinition::new_empty(this, definition_span, identifier)
+                });
+                definition_class = DefinitionClass::Enum;
+                ast_definition_id = enum_def_id.upcast();
+            },
+            KW_UNION => {
+                let union_def_id = ctx.heap.alloc_union_definition(|this| {
+                    UnionDefinition::new_empty(this, definition_span, identifier)
+                });
+                definition_class = DefinitionClass::Union;
+                ast_definition_id = union_def_id.upcast()
+            },
+            KW_FUNCTION => {
+                let func_def_id = ctx.heap.alloc_function_definition(|this| {
+                    FunctionDefinition::new_empty(this, definition_span, identifier)
+                });
+                definition_class = DefinitionClass::Function;
+                ast_definition_id = func_def_id.upcast();
+            },
+            KW_PRIMITIVE | KW_COMPOSITE => {
+                let component_variant = if kw_text == KW_PRIMITIVE {
+                    ComponentVariant::Primitive
+                } else {
+                    ComponentVariant::Composite
+                };
+                let comp_def_id = ctx.heap.alloc_component_definition(|this| {
+                    ComponentDefinition::new_empty(this, definition_span, component_variant, identifier)
+                });
+                definition_class = DefinitionClass::Component;
+                ast_definition_id = comp_def_id.upcast();
+            },
+            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
+        }
+
+        let symbol = Symbol{
+            name: ident_text,
+            variant: SymbolVariant::Definition(SymbolDefinition{
+                defined_in_module: module.root_id,
+                defined_in_scope: SymbolScope::Module(module.root_id),
+                definition_span,
+                identifier_span,
+                imported_at: None,
+                class: definition_class,
+                definition_id: ast_definition_id,
+            }),
+        };
+        self.symbols.push(symbol);
+        self.definitions.push(ast_definition_id);
+
+        Ok(())
+    }
+}
\ No newline at end of file
diff --git a/src/protocol/tokenizer/mod.rs b/src/protocol/parser/pass_tokenizer.rs
similarity index 72%
rename from src/protocol/tokenizer/mod.rs
rename to src/protocol/parser/pass_tokenizer.rs
index 371138f42ebda9441a93b236428802eba6b6938f..f0a8c44190f142a38b65583fd1836c70c933ae90 100644
--- a/src/protocol/tokenizer/mod.rs
+++ b/src/protocol/parser/pass_tokenizer.rs
@@ -5,309 +5,7 @@ use crate::protocol::input_source2::{
     InputSpan
 };
 
-pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
-pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
-pub(crate) const KW_UNION:     &'static [u8] = b"union";
-pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
-pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
-pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
-pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
-pub(crate) enum TokenKind {
-    // Variable-character tokens, followed by a SpanEnd token
-    Ident,          // regular identifier
-    Pragma,         // identifier with prefixed `#`, range includes `#`
-    Integer,        // integer literal
-    String,         // string literal, range includes `"`
-    Character,      // character literal, range includes `'`
-    LineComment,    // line comment, range includes leading `//`, but not newline
-    BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
-    // Punctuation (single character)
-    Exclamation,    // !
-    Question,       // ?
-    Pound,          // #
-    OpenAngle,      // <
-    OpenCurly,      // {
-    OpenParen,      // (
-    OpenSquare,     // [
-    CloseAngle,     // >
-    CloseCurly,     // }
-    CloseParen,     // )
-    CloseSquare,    // ]
-    Colon,          // :
-    Comma,          // ,
-    Dot,            // .
-    SemiColon,      // ;
-    Quote,          // '
-    DoubleQuote,    // "
-    // Operator-like (single character)
-    At,             // @
-    Plus,           // +
-    Minus,          // -
-    Star,           // *
-    Slash,          // /
-    Percent,        // %
-    Caret,          // ^
-    And,            // &
-    Or,             // |
-    Tilde,          // ~
-    Equal,          // =
-    // Punctuation (two characters)
-    ColonColon,     // ::
-    DotDot,         // ..
-    ArrowRight,     // ->
-    // Operator-like (two characters)
-    PlusPlus,       // ++
-    PlusEquals,     // +=
-    MinusMinus,     // --
-    MinusEquals,    // -=
-    StarEquals,     // *=
-    SlashEquals,    // /=
-    PercentEquals,  // %=
-    CaretEquals,    // ^=
-    AndAnd,         // &&
-    AndEquals,      // &=
-    OrOr,           // ||
-    OrEquals,       // |=
-    EqualEqual,     // ==
-    NotEqual,       // !=
-    ShiftLeft,      // <<
-    ShiftRight,     // >>
-    // Operator-like (three characters)
-    ShiftLeftEquals,// <<=
-    ShiftRightEquals, // >>=
-    // Special marker token to indicate end of variable-character tokens
-    SpanEnd,
-}
-
-impl TokenKind {
-    /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is
-    /// the case for tokens of variable length (e.g. an identifier).
-    fn has_span_end(&self) -> bool {
-        return *self <= TokenKind::BlockComment
-    }
-
-    /// Returns the number of characters associated with the token. May only be called on tokens
-    /// that do not have a variable length.
-    fn num_characters(&self) -> u32 {
-        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
-        if *self <= TokenKind::Equal {
-            1
-        } else if *self <= TokenKind::ShiftRight {
-            2
-        } else {
-            3
-        }
-    }
-
-    /// Returns the characters that are represented by the token, may only be called on tokens that
-    /// do not have a variable length.
-    pub fn token_chars(&self) -> &'static str {
-        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
-        use TokenKind as TK;
-        match self {
-            TK::Exclamation => "!",
-            TK::Question => "?",
-            TK::Pound => "#",
-            TK::OpenAngle => "<",
-            TK::OpenCurly => "{",
-            TK::OpenParen => "(",
-            TK::OpenSquare => "[",
-            TK::CloseAngle => ">",
-            TK::CloseCurly => "}",
-            TK::CloseParen => ")",
-            TK::CloseSquare => "]",
-            TK::Colon => ":",
-            TK::Comma => ",",
-            TK::Dot => ".",
-            TK::SemiColon => ";",
-            TK::Quote => "'",
-            TK::DoubleQuote => "\"",
-            TK::At => "@",
-            TK::Plus => "+",
-            TK::Minus => "-",
-            TK::Star => "*",
-            TK::Slash => "/",
-            TK::Percent => "%",
-            TK::Caret => "^",
-            TK::And => "&",
-            TK::Or => "|",
-            TK::Tilde => "~",
-            TK::Equal => "=",
-            TK::ColonColon => "::",
-            TK::DotDot => "..",
-            TK::ArrowRight => "->",
-            TK::PlusPlus => "++",
-            TK::PlusEquals => "+=",
-            TK::MinusMinus => "--",
-            TK::MinusEquals => "-=",
-            TK::StarEquals => "*=",
-            TK::SlashEquals => "/=",
-            TK::PercentEquals => "%=",
-            TK::CaretEquals => "^=",
-            TK::AndAnd => "&&",
-            TK::AndEquals => "&=",
-            TK::OrOr => "||",
-            TK::OrEquals => "|=",
-            TK::EqualEqual => "==",
-            TK::NotEqual => "!=",
-            TK::ShiftLeft => "<<",
-            TK::ShiftRight => ">>",
-            TK::ShiftLeftEquals => "<<=",
-            TK::ShiftRightEquals => ">>=",
-            // Lets keep these in explicitly for now, in case we want to add more symbols
-            TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
-            TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
-        }
-    }
-}
-
-pub(crate) struct Token {
-    pub kind: TokenKind,
-    pub pos: InputPosition,
-}
-
-impl Token {
-    fn new(kind: TokenKind, pos: InputPosition) -> Self {
-        Self{ kind, pos }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub(crate) enum TokenRangeKind {
-    Module,
-    Pragma,
-    Import,
-    Definition,
-    Code,
-}
-
-/// TODO: Add first_child and next_sibling indices for slightly faster traversal
-#[derive(Debug)]
-pub(crate) struct TokenRange {
-    // Index of parent in `TokenBuffer.ranges`, does not have a parent if the 
-    // range kind is Module, in that case the parent index points to itself.
-    pub parent_idx: usize,
-    pub range_kind: TokenRangeKind,
-    pub curly_depth: u32,
-    // InputPosition offset is limited to u32, so token ranges can be as well.
-    pub start: u32,             // first token (inclusive index)
-    pub end: u32,               // last token (exclusive index)
-    // Subranges and where they can be found
-    pub subranges: u32,         // Number of subranges
-    pub first_child_idx: u32,   // First subrange (or points to self if no subranges)
-    pub last_child_idx: u32,    // Last subrange (or points to self if no subranges)
-    pub next_sibling_idx: Option<u32>,
-}
-
-pub(crate) struct TokenBuffer {
-    pub tokens: Vec<Token>,
-    pub ranges: Vec<TokenRange>,
-}
-
-impl TokenBuffer {
-    pub(crate) fn new() -> Self {
-        Self{ tokens: Vec::new(), ranges: Vec::new() }
-    }
-
-    pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
-        TokenIter::new(self, range.start as usize, range.end as usize)
-    }
-
-    pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
-        self.tokens[range.start].pos
-    }
-
-    pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
-        let last_token = &self.tokens[range.end - 1];
-        if last_token.kind == TokenKind::SpanEnd {
-            return last_token.pos
-        } else {
-            debug_assert!(!last_token.kind.has_span_end());
-            return last_token.pos.with_offset(last_token.kind.num_characters());
-        }
-    }
-}
-
-pub(crate) struct TokenIter<'a> {
-    tokens: &'a Vec<Token>,
-    cur: usize,
-    end: usize,
-}
-
-impl<'a> TokenIter<'a> {
-    fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
-        Self{ tokens: &buffer.tokens, cur: start, end }
-    }
-
-    /// Returns the next token (may include comments), or `None` if at the end
-    /// of the range.
-    pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
-        if self.cur >= self.end {
-            return None;
-        }
-
-        let token = &self.tokens[self.cur];
-        Some(token.kind)
-    }
-
-    /// Returns the next token (but skips over comments), or `None` if at the
-    /// end of the range
-    pub(crate) fn next(&mut self) -> Option<TokenKind> {
-        while let Some(token_kind) = self.next() {
-            if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
-                return Some(token_kind);
-            }
-            self.consume();
-        }
-
-        return None
-    }
-
-    /// Returns the start position belonging to the token returned by `next`. If
-    /// there is not a next token, then we return the end position of the
-    /// previous token.
-    pub(crate) fn last_valid_pos(&self) -> InputPosition {
-        if self.cur < self.end {
-            // Return token position
-            return self.tokens[self.cur].pos
-        }
-
-        // Return previous token end
-        let token = &self.tokens[self.cur - 1];
-        return if token.kind == TokenKind::SpanEnd {
-            token.pos
-        } else {
-            token.pos.with_offset(token.kind.num_characters());
-        };
-    }
-
-    /// Returns the token range belonging to the token returned by `next`. This
-    /// assumes that we're not at the end of the range we're iterating over.
-    pub(crate) fn next_range(&self) -> (InputPosition, InputPosition) {
-        debug_assert!(self.cur < self.end);
-        let token = &self.tokens[self.cur];
-        if token.kind.has_span_end() {
-            let span_end = &self.tokens[self.cur + 1];
-            debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
-            (token.pos, span_end.pos)
-        } else {
-            let offset = token.kind.num_characters();
-            (token.pos, token.pos.with_offset(offset))
-        }
-    }
-
-    pub(crate) fn consume(&mut self) {
-        if let Some(kind) = self.next() {
-            if kind.has_span_end() {
-                self.cur += 2;
-            } else {
-                self.cur += 1;
-            }
-        }
-    }
-}
+use crate::protocol::parser::tokens::*;
 
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
@@ -318,7 +16,7 @@ impl<'a> TokenIter<'a> {
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
-pub(crate) struct Tokenizer {
+pub(crate) struct PassTokenizer {
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
@@ -327,7 +25,7 @@ pub(crate) struct Tokenizer {
     stack_idx: usize,
 }
 
-impl Tokenizer {
+impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{ curly_stack: Vec::with_capacity(32), stack_idx: 0 }
     }
@@ -348,7 +46,7 @@ impl Tokenizer {
             curly_depth: 0,
             start: 0,
             end: 0,
-            subranges: 0,
+            num_child_ranges: 0,
             first_child_idx: 0,
             last_child_idx: 0,
             next_sibling_idx: None,
@@ -395,7 +93,7 @@ impl Tokenizer {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
-                        // Check if this marks the end of a range we're 
+                        // Check if this marks the end of a range we're
                         // currently processing
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error(
@@ -445,7 +143,7 @@ impl Tokenizer {
             // For each range make sure its children make sense
             for parent_idx in 0..target.ranges.len() {
                 let cur_range = &target.ranges[parent_idx];
-                if cur_range.subranges == 0 {
+                if cur_range.num_child_ranges == 0 {
                     assert_eq!(cur_range.first_child_idx, parent_idx as u32);
                     assert_eq!(cur_range.last_child_idx, parent_idx as u32);
                 } else {
@@ -463,7 +161,7 @@ impl Tokenizer {
                     }
 
                     assert_eq!(cur_range.last_child_idx, last_child_idx);
-                    assert_eq!(cur_range.subranges, child_counter);
+                    assert_eq!(cur_range.num_child_ranges, child_counter);
                 }
             }
         }
@@ -637,7 +335,7 @@ impl Tokenizer {
             token_kind = TokenKind::CloseSquare;
         } else if first_char == b'^' {
             source.consume();
-            if let Some(b'=') = source.next() { 
+            if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::CaretEquals;
             } else {
@@ -683,7 +381,7 @@ impl Tokenizer {
                 return Err(ParseError::new_error(source, source.pos(), "non-ASCII character in char literal"));
             }
             source.consume();
-            
+
             // Make sure ending quote was not escaped
             if c == b'\'' && prev_char != b'\\' {
                 prev_char = c;
@@ -865,7 +563,7 @@ impl Tokenizer {
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
-        Ok(source.section(begin_pos, end_pos))
+        Ok(source.section_at_pos(begin_pos, end_pos))
     }
 
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
@@ -915,9 +613,9 @@ impl Tokenizer {
         let cur_range = &mut target.ranges[self.stack_idx];
 
         // If we have just popped a range and then push a new range, then the
-        // first token is equal to the last token registered on the current 
-        // range. If not, then we had some intermediate tokens that did not 
-        // belong to a particular kind of token range: hence we insert an 
+        // first token is equal to the last token registered on the current
+        // range. If not, then we had some intermediate tokens that did not
+        // belong to a particular kind of token range: hence we insert an
         // intermediate "code" range.
         if cur_range.end != first_token {
             let code_start = cur_range.end;
@@ -931,14 +629,14 @@ impl Tokenizer {
             cur_range.last_child_idx = code_range_idx + 1;
 
             cur_range.end = first_token;
-            cur_range.subranges += 1;
+            cur_range.num_child_ranges += 1;
             target.ranges.push(TokenRange{
                 parent_idx: self.stack_idx,
                 range_kind: TokenRangeKind::Code,
                 curly_depth: self.curly_depth,
                 start: code_start,
                 end: first_token,
-                subranges: 0,
+                num_child_ranges: 0,
                 first_child_idx: code_range_idx,
                 last_child_idx: code_range_idx,
                 next_sibling_idx: Some(code_range_idx + 1), // we're going to push this thing next
@@ -964,7 +662,7 @@ impl Tokenizer {
             curly_depth: self.curly_depth,
             start: first_token,
             end: first_token,
-            subranges: 0,
+            num_child_ranges: 0,
             first_child_idx: new_range_idx,
             last_child_idx: new_range_idx,
             next_sibling_idx: None,
@@ -978,12 +676,12 @@ impl Tokenizer {
         // Fix up the current range before going back to parent
         last.end = end_index;
         debug_assert_ne!(last.start, end_index);
-        
+
         // Go back to parent
         self.stack_idx = last.parent_idx;
         let parent = &mut target.ranges[self.stack_idx];
         parent.end = end_index;
-        parent.subranges += 1;
+        parent.num_child_ranges += 1;
     }
 
 
@@ -1003,11 +701,11 @@ impl Tokenizer {
 fn demarks_definition(ident: &[u8]) -> bool {
     return
         ident == KW_STRUCT ||
-        ident == KW_ENUM ||
-        ident == KW_UNION ||
-        ident == KW_FUNCTION ||
-        ident == KW_PRIMITIVE ||
-        ident == KW_COMPOSITE
+            ident == KW_ENUM ||
+            ident == KW_UNION ||
+            ident == KW_FUNCTION ||
+            ident == KW_PRIMITIVE ||
+            ident == KW_COMPOSITE
 }
 
 fn demarks_import(ident: &[u8]) -> bool {
@@ -1031,18 +729,18 @@ fn is_pragma_start_or_pound(c: u8) -> bool {
 }
 
 fn is_identifier_start(c: u8) -> bool {
-    return 
+    return
         (c >= b'a' && c <= b'z') ||
-        (c >= b'A' && c <= b'Z') ||
-        c == b'_' 
+            (c >= b'A' && c <= b'Z') ||
+            c == b'_'
 }
 
 fn is_identifier_remaining(c: u8) -> bool {
-    return 
+    return
         (c >= b'0' && c <= b'9') ||
-        (c >= b'a' && c <= b'z') ||
-        (c >= b'A' && c <= b'Z') ||
-        c == b'_' 
+            (c >= b'a' && c <= b'z') ||
+            (c >= b'A' && c <= b'Z') ||
+            c == b'_'
 }
 
 fn is_integer_literal_start(c: u8) -> bool {
@@ -1050,10 +748,10 @@ fn is_integer_literal_start(c: u8) -> bool {
 }
 
 fn maybe_number_remaining(c: u8) -> bool {
-    return 
+    return
         (c == b'b' || c == b'B' || c == b'o' || c == b'O' || c == b'x' || c == b'X') ||
-        (c >= b'0' && c <= b'9') ||
-        c == b'_';
+            (c >= b'0' && c <= b'9') ||
+            c == b'_';
 }
 
 #[cfg(test)]
@@ -1064,7 +762,7 @@ mod tests {
     #[test]
     fn test_tokenizer() {
         let mut source = InputSource::new_test("
-        
+
         #version 500
         # hello 2
 
@@ -1100,7 +798,7 @@ mod tests {
             }
         }
         ");
-        let mut t = Tokenizer::new();
+        let mut t = PassTokenizer::new();
         let mut buffer = TokenBuffer::new();
         t.tokenize(&mut source, &mut buffer).expect("tokenize");
 
@@ -1119,7 +817,7 @@ mod tests {
                     let (_, end) = iter.next().unwrap();
                     println!("[{}] {:?} ......", idx, token.kind);
                     assert_eq!(end.kind, TokenKind::SpanEnd);
-                    let text = source.section(token.pos, end.pos);
+                    let text = source.section_at_pos(token.pos, end.pos);
                     println!("{}", String::from_utf8_lossy(text));
                 },
                 _ => {
diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs
index b12a28b14736b03d1840194f7a2f9c4a21e79987..43d7c6d22d7cc859ed1d851d25fc00899012fdaa 100644
--- a/src/protocol/parser/symbol_table.rs
+++ b/src/protocol/parser/symbol_table.rs
@@ -123,7 +123,7 @@ impl SymbolTable {
                     Import::Symbols(import) => {
                         if import.symbols.is_empty() {
                             // Add all symbols from the other module
-                            match self.module_lookup.get(&import.module_name) {
+                            match self.module_lookup.get(&import.module) {
                                 Some(target_module_id) => {
                                     lookup_reserve_size += heap[*target_module_id].definitions.len()
                                 },
@@ -174,7 +174,7 @@ impl SymbolTable {
                 match import {
                     Import::Module(import) => {
                         // Find the module using its name
-                        let target_root_id = self.resolve_module(&import.module_name);
+                        let target_root_id = self.resolve_module(&import.module);
                         if target_root_id.is_none() {
                             return Err(ParseError::new_error(&module.source, import.position, "Could not resolve module"));
                         }
@@ -196,7 +196,7 @@ impl SymbolTable {
                     },
                     Import::Symbols(import) => {
                         // Find the target module using its name
-                        let target_root_id = self.resolve_module(&import.module_name);
+                        let target_root_id = self.resolve_module(&import.module);
                         if target_root_id.is_none() {
                             return Err(ParseError::new_error(&module.source, import.position, "Could not resolve module of symbol imports"));
                         }
diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs
index bc40c7b5b9f2666321f7903f2e98c7246d7bbd21..967c44cde8d40bb846caa0977afd513f46cfa239 100644
--- a/src/protocol/parser/symbol_table2.rs
+++ b/src/protocol/parser/symbol_table2.rs
@@ -88,11 +88,13 @@ impl SymbolDefinition {
     }
 }
 
+#[derive(Debug)]
 pub struct SymbolModule {
     pub root_id: RootId,
     pub introduced_at: ImportId,
 }
 
+#[derive(Debug, Clone)]
 pub struct SymbolDefinition {
     // Definition location (not necessarily the place where the symbol
     // is introduced, as it may be imported)
@@ -108,19 +110,53 @@ pub struct SymbolDefinition {
     pub definition_id: DefinitionId,
 }
 
+impl SymbolDefinition {
+    /// Clones the entire data structure, but replaces the `imported_at` field
+    /// with the supplied `ImportId`.
+    pub(crate) fn into_imported(mut self, imported_at: ImportId) -> Self {
+        self.imported_at = Some(imported_at);
+        self
+    }
+}
+
+#[derive(Debug)]
 pub enum SymbolVariant {
     Module(SymbolModule),
     Definition(SymbolDefinition),
 }
 
+impl SymbolVariant {
+    pub(crate) fn as_module(&self) -> &SymbolModule {
+        match self {
+            SymbolVariant::Module(v) => v,
+            SymbolVariant::Definition(_) => unreachable!("called 'as_module' on {:?}", self),
+        }
+    }
+
+    pub(crate) fn as_definition(&self) -> &SymbolDefinition {
+        match self {
+            SymbolVariant::Module(v) => unreachable!("called 'as_definition' on {:?}", self),
+            SymbolVariant::Definition(v) => v,
+        }
+    }
+
+    pub(crate) fn as_definition_mut(&mut self) -> &mut SymbolDefinition {
+        match self {
+            SymbolVariant::Module(v) => unreachable!("called 'as_definition_mut' on {:?}", self),
+            SymbolVariant::Definition(v) => v,
+        }
+    }
+}
+
+#[derive(Clone)]
 pub struct Symbol {
     pub name: StringRef<'static>,
-    pub data: SymbolVariant,
+    pub variant: SymbolVariant,
 }
 
 impl Symbol {
-    fn class(&self) -> SymbolClass {
-        match &self.data {
+    pub(crate) fn class(&self) -> SymbolClass {
+        match &self.variant {
             SymbolVariant::Module(_) => SymbolClass::Module,
             SymbolVariant::Definition(data) => data.class.as_symbol_class(),
         }
@@ -236,7 +272,7 @@ impl SymbolTable {
     ) -> Option<&Symbol> {
         match self.get_symbol_by_name(in_scope, name) {
             Some(symbol) => {
-                match &symbol.data {
+                match &symbol.variant {
                     SymbolVariant::Module(_) => {
                         None // in-scope modules are always imported
                     },
@@ -252,4 +288,27 @@ impl SymbolTable {
             None => None,
         }
     }
+
+    /// Retrieves all symbols that are defined within a particular scope. Imported symbols are
+    /// ignored. Returns `true` if the scope was found (which may contain 0 defined symbols) and
+    /// `false` if the scope was not found.
+    pub(crate) fn get_all_symbols_defined_in_scope(&self, in_scope: SymbolScope, target: &mut Vec<Symbol>) -> bool {
+        match self.scope_lookup.get(&in_scope) {
+            Some(scope) => {
+                for symbol in &scope.symbols {
+                    if let SymbolVariant::Definition(definition) = &symbol.variant {
+                        if definition.imported_at.is_some() {
+                            continue;
+                        }
+
+                        // Defined in scope, so push onto target
+                        target.push(symbol.clone());
+                    }
+                }
+
+                true
+            },
+            None => false,
+        }
+    }
 }
\ No newline at end of file
diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs
new file mode 100644
index 0000000000000000000000000000000000000000..bf7b4e734bde19af993f20eb553d449ac2b2584b
--- /dev/null
+++ b/src/protocol/parser/token_parsing.rs
@@ -0,0 +1,397 @@
+use crate::collections::StringRef;
+use crate::protocol::ast::*;
+use crate::protocol::input_source2::{
+    InputSource2 as InputSource,
+    InputPosition2 as InputPosition,
+    InputSpan,
+    ParseError,
+};
+use super::tokens::*;
+use super::symbol_table2::*;
+use super::{Module, ModuleCompilationPhase, PassCtx};
+
+// Keywords
+pub(crate) const KW_LET:       &'static [u8] = b"let";
+pub(crate) const KW_AS:        &'static [u8] = b"as";
+pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
+pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
+pub(crate) const KW_UNION:     &'static [u8] = b"union";
+pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
+pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
+pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
+pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
+
+// Keywords - literals
+pub(crate) const KW_LIT_TRUE:  &'static [u8] = b"true";
+pub(crate) const KW_LIT_FALSE: &'static [u8] = b"false";
+pub(crate) const KW_LIT_NULL:  &'static [u8] = b"null";
+
+// Keywords - functions
+pub(crate) const KW_FUNC_GET:    &'static [u8] = b"get";
+pub(crate) const KW_FUNC_PUT:    &'static [u8] = b"put";
+pub(crate) const KW_FUNC_FIRES:  &'static [u8] = b"fires";
+pub(crate) const KW_FUNC_CREATE: &'static [u8] = b"create";
+pub(crate) const KW_FUNC_LENGTH: &'static [u8] = b"length";
+
+// Keywords - statements
+pub(crate) const KW_STMT_CHANNEL:  &'static [u8] = b"channel";
+pub(crate) const KW_STMT_IF:       &'static [u8] = b"if";
+pub(crate) const KW_STMT_WHILE:    &'static [u8] = b"while";
+pub(crate) const KW_STMT_BREAK:    &'static [u8] = b"break";
+pub(crate) const KW_STMT_CONTINUE: &'static [u8] = b"continue";
+pub(crate) const KW_STMT_GOTO:     &'static [u8] = b"goto";
+pub(crate) const KW_STMT_RETURN:   &'static [u8] = b"return";
+pub(crate) const KW_STMT_SYNC:     &'static [u8] = b"synchronous";
+pub(crate) const KW_STMT_ASSERT:   &'static [u8] = b"assert";
+pub(crate) const KW_STMT_NEW:      &'static [u8] = b"new";
+
+// Keywords - types
+pub(crate) const KW_TYPE_IN_PORT:  &'static [u8] = b"in";
+pub(crate) const KW_TYPE_OUT_PORT: &'static [u8] = b"out";
+pub(crate) const KW_TYPE_MESSAGE:  &'static [u8] = b"msg";
+pub(crate) const KW_TYPE_BOOL:     &'static [u8] = b"bool";
+pub(crate) const KW_TYPE_UINT8:    &'static [u8] = b"u8";
+pub(crate) const KW_TYPE_UINT16:   &'static [u8] = b"u16";
+pub(crate) const KW_TYPE_UINT32:   &'static [u8] = b"u32";
+pub(crate) const KW_TYPE_UINT64:   &'static [u8] = b"u64";
+pub(crate) const KW_TYPE_SINT8:     &'static [u8] = b"s8";
+pub(crate) const KW_TYPE_SINT16:    &'static [u8] = b"s16";
+pub(crate) const KW_TYPE_SINT32:    &'static [u8] = b"s32";
+pub(crate) const KW_TYPE_SINT64:    &'static [u8] = b"s64";
+pub(crate) const KW_TYPE_CHAR:     &'static [u8] = b"char";
+pub(crate) const KW_TYPE_STRING:   &'static [u8] = b"string";
+pub(crate) const KW_TYPE_INFERRED: &'static [u8] = b"auto";
+
+/// Consumes a domain-name identifier: identifiers separated by a dot. For
+/// simplification of later parsing and span identification the domain-name may
+/// contain whitespace, but must reside on the same line.
+pub(crate) fn consume_domain_ident<'a>(
+    source: &'a InputSource, iter: &mut TokenIter
+) -> Result<(&'a [u8], InputSpan), ParseError> {
+    let (_, mut span) = consume_ident(source, iter)?;
+    while let Some(TokenKind::Dot) = iter.next() {
+        iter.consume();
+        let (_, new_span) = consume_ident(source, iter)?;
+        span.end = new_span.end;
+    }
+
+    // Not strictly necessary, but probably a reasonable restriction: this
+    // simplifies parsing of module naming and imports.
+    if span.begin.line != span.end.line {
+        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
+    }
+
+    // If module name consists of a single identifier, then it may not match any
+    // of the reserved keywords
+    let section = source.section_at_pos(span.begin, span.end);
+    if is_reserved_keyword(section) {
+        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
+
+    Ok((source.section_at_pos(span.begin, span.end), span))
+}
+
+/// Consumes a specific expected token. Be careful to only call this with tokens
+/// that do not have a variable length.
+pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<(), ParseError> {
+    if Some(expected) != iter.next() {
+        return Err(ParseError::new_error_at_pos(
+            source, iter.last_valid_pos(),
+            format!("expected '{}'", expected.token_chars())
+        ));
+    }
+    iter.consume();
+    Ok(())
+}
+
+/// Consumes a comma-separated list of items if the opening delimiting token is
+/// encountered. If not, then the iterator will remain at its current position.
+/// Note that the potential cases may be:
+/// - No opening delimiter encountered, then we return `false`.
+/// - Both opening and closing delimiter encountered, but no items.
+/// - Opening and closing delimiter encountered, and items were processed.
+/// - Found an opening delimiter, but processing an item failed.
+pub(crate) fn maybe_consume_comma_separated<T, F>(
+    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter,
+    consumer_fn: F, target: &mut Vec<T>, item_name_and_article: &'static str
+) -> Result<bool, ParseError>
+    where F: Fn(&InputSource, &mut TokenIter) -> Result<T, ParseError>
+{
+    let mut next = iter.next();
+    if Some(open_delim) != next {
+        return Ok(false);
+    }
+
+    // Opening delimiter encountered, so must parse the comma-separated list.
+    iter.consume();
+    target.clear();
+    let mut had_comma = true;
+    loop {
+        next = iter.next();
+        if Some(close_delim) == next {
+            iter.consume();
+            break;
+        } else if !had_comma {
+            return Err(ParseError::new_error_at_pos(
+                source, iter.last_valid_pos(),
+                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
+            ));
+        }
+
+        let new_item = consumer_fn(source, iter)?;
+        target.push(new_item);
+
+        next = iter.next();
+        had_comma = next == Some(TokenKind::Comma);
+        if had_comma {
+            iter.consume();
+        }
+    }
+
+    Ok(true)
+}
+
+/// Consumes a comma-separated list and expected the opening and closing
+/// characters to be present. The returned array may still be empty
+pub(crate) fn consume_comma_separated<T, F>(
+    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter,
+    consumer_fn: F, target: &mut Vec<T>, item_name_and_article: &'static str,
+    list_name_and_article: &'static str
+) -> Result<(), ParseError>
+    where F: Fn(&InputSource, &mut TokenIter) -> Result<T, ParseError>
+{
+    let first_pos = iter.last_valid_pos();
+    match maybe_consume_comma_separated(open_delim, close_delim, source, iter, consumer_fn, target, item_name_and_article) {
+        Ok(true) => Ok(()),
+        Ok(false) => {
+            return Err(ParseError::new_error_at_pos(
+                source, first_pos,
+                format!("expected a {}", list_name_and_article)
+            ));
+        },
+        Err(err) => Err(err)
+    }
+}
+
+/// Consumes an integer literal, may be binary, octal, hexadecimal or decimal,
+/// and may have separating '_'-characters.
+pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
+    if Some(TokenKind::Integer) != iter.next() {
+        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
+    }
+    let integer_span = iter.next_span();
+    iter.consume();
+
+    let integer_text = source.section_at_span(integer_span);
+
+    // Determine radix and offset from prefix
+    let (radix, input_offset, radix_name) =
+        if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
+            // Binary number
+            (2, 2, "binary")
+        } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
+            // Octal number
+            (8, 2, "octal")
+        } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
+            // Hexadecimal number
+            (16, 2, "hexadecimal")
+        } else {
+            (10, 0, "decimal")
+        };
+
+    // Take out any of the separating '_' characters
+    buffer.clear();
+    for char_idx in input_offset..integer_text.len() {
+        let char = integer_text[char_idx];
+        if char == b'_' {
+            continue;
+        }
+        if !char.is_ascii_digit() {
+            return Err(ParseError::new_error_at_span(
+                source, integer_span,
+                format!("incorrectly formatted {} number", radix_name)
+            ));
+        }
+        buffer.push(char::from(char));
+    }
+
+    // Use the cleaned up string to convert to integer
+    match u64::from_str_radix(&buffer, radix) {
+        Ok(number) => Ok((number, integer_span)),
+        Err(_) => Err(ParseError::new_error_at_span(
+            source, integer_span,
+            format!("incorrectly formatted {} number", radix_name)
+        )),
+    }
+}
+
+pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
+    if Some(TokenKind::Pragma) != iter.next() {
+        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
+    }
+    let (pragma_start, pragma_end) = iter.next_positions();
+    iter.consume();
+    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
+}
+
+pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
+    peek_ident(source, iter).map_or(false, |section| section == expected)
+}
+
+pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
+    if Some(TokenKind::Ident) == iter.next() {
+        let (start, end) = iter.next_positions();
+        return Some(source.section_at_pos(start, end))
+    }
+
+    None
+}
+
+/// Consumes any identifier and returns it together with its span. Does not
+/// check if the identifier is a reserved keyword.
+pub(crate) fn consume_any_ident<'a>(
+    source: &'a InputSource, iter: &mut TokenIter
+) -> Result<(&'a [u8], InputSpan), ParseError> {
+    if Some(TokenKind::Ident) != iter.next() {
+        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an identifier"));
+    }
+    let (ident_start, ident_end) = iter.next_positions();
+    iter.consume();
+    Ok((source.section_at_pos(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
+}
+
+/// Consumes a specific identifier. May or may not be a reserved keyword.
+pub(crate) fn consume_exact_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> Result<InputSpan, ParseError> {
+    let (ident, pos) = consume_any_ident(source, iter)?;
+    if ident != expected {
+        debug_assert!(expected.is_ascii());
+        return Err(ParseError::new_error_at_pos(
+            source, iter.last_valid_pos(),
+            format!("expected the text '{}'", &String::from_utf8_lossy(expected))
+        ));
+    }
+    Ok(pos)
+}
+
+/// Consumes an identifier that is not a reserved keyword and returns it
+/// together with its span.
+pub(crate) fn consume_ident<'a>(
+    source: &'a InputSource, iter: &mut TokenIter
+) -> Result<(&'a [u8], InputSpan), ParseError> {
+    let (ident, span) = consume_any_ident(source, iter)?;
+    if is_reserved_keyword(ident) {
+        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
+
+    Ok((ident, span))
+}
+
+/// Consumes an identifier and immediately intern it into the `StringPool`
+pub(crate) fn consume_ident_interned(
+    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx
+) -> Result<Identifier, ParseError> {
+    let (value, span) = consume_ident(source, iter)?;
+    let value = ctx.pool.intern(value);
+    Ok(Identifier{ span, value })
+}
+
+fn is_reserved_definition_keyword(text: &[u8]) -> bool {
+    match text {
+        KW_STRUCT | KW_ENUM | KW_UNION | KW_FUNCTION | KW_PRIMITIVE | KW_COMPOSITE => true,
+        _ => false,
+    }
+}
+
+fn is_reserved_statement_keyword(text: &[u8]) -> bool {
+    match text {
+        KW_IMPORT | KW_AS |
+        KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
+        KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
+        KW_STMT_SYNC | KW_STMT_ASSERT | KW_STMT_NEW => true,
+        _ => false,
+    }
+}
+
+fn is_reserved_expression_keyword(text: &[u8]) -> bool {
+    match text {
+        KW_LET |
+        KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
+        KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_LENGTH => true,
+        _ => false,
+    }
+}
+
+fn is_reserved_type_keyword(text: &[u8]) -> bool {
+    match text {
+        KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
+        KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
+        KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
+        KW_TYPE_CHAR | KW_TYPE_STRING |
+        KW_TYPE_INFERRED => true,
+        _ => false,
+    }
+}
+
+fn is_reserved_keyword(text: &[u8]) -> bool {
+    return
+        is_reserved_definition_keyword(text) ||
+        is_reserved_statement_keyword(text) ||
+        is_reserved_expression_keyword(text) ||
+        is_reserved_type_keyword(text);
+}
+
+pub(crate) fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
+    for module in modules {
+        if module.root_id == root_id {
+            return Some(module)
+        }
+    }
+
+    return None
+}
+
+/// Constructs a human-readable message indicating why there is a conflict of
+/// symbols.
+// Note: passing the `module_idx` is not strictly necessary, but will prevent
+// programmer mistakes during development: we get a conflict because we're
+// currently parsing a particular module.
+pub(crate) fn construct_symbol_conflict_error(
+    modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol
+) -> ParseError {
+    let module = &modules[module_idx];
+    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) {
+        match symbol.introduced_at {
+            Some(import_id) => {
+                // Symbol is being imported
+                let import = &ctx.heap[import_id];
+                match import {
+                    Import::Module(import) => (
+                        format!("the module aliased as '{}' imported here", symbol.name.as_str()),
+                        import.span
+                    ),
+                    Import::Symbols(symbols) => (
+                        format!("the type '{}' imported here", symbol.name.as_str()),
+                        symbols.span
+                    ),
+                }
+            },
+            None => {
+                // Symbol is being defined
+                debug_assert_eq!(symbol.defined_in_module, module.root_id);
+                debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module);
+                (
+                    format!("the type '{}' defined here", symbol.name.as_str()),
+                    symbol.identifier_span
+                )
+            }
+        }
+    };
+
+    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
+    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
+    return ParseError::new_error_at_span(
+        &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
+    ).with_info_at_span(
+        &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
+    )
+}
\ No newline at end of file
diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs
new file mode 100644
index 0000000000000000000000000000000000000000..710548b32f8365e009f48646c294251faadbb1f5
--- /dev/null
+++ b/src/protocol/parser/tokens.rs
@@ -0,0 +1,315 @@
+use crate::protocol::input_source2::{
+    InputPosition2 as InputPosition,
+    InputSpan
+};
+
+/// Represents a particular kind of token. Some tokens represent
+/// variable-character tokens. Such a token is always followed by a
+/// `TokenKind::SpanEnd` token.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) enum TokenKind {
+    // Variable-character tokens, followed by a SpanEnd token
+    Ident,          // regular identifier
+    Pragma,         // identifier with prefixed `#`, range includes `#`
+    Integer,        // integer literal
+    String,         // string literal, range includes `"`
+    Character,      // character literal, range includes `'`
+    LineComment,    // line comment, range includes leading `//`, but not newline
+    BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
+    // Punctuation (single character)
+    Exclamation,    // !
+    Question,       // ?
+    Pound,          // #
+    OpenAngle,      // <
+    OpenCurly,      // {
+    OpenParen,      // (
+    OpenSquare,     // [
+    CloseAngle,     // >
+    CloseCurly,     // }
+    CloseParen,     // )
+    CloseSquare,    // ]
+    Colon,          // :
+    Comma,          // ,
+    Dot,            // .
+    SemiColon,      // ;
+    Quote,          // '
+    DoubleQuote,    // "
+    // Operator-like (single character)
+    At,             // @
+    Plus,           // +
+    Minus,          // -
+    Star,           // *
+    Slash,          // /
+    Percent,        // %
+    Caret,          // ^
+    And,            // &
+    Or,             // |
+    Tilde,          // ~
+    Equal,          // =
+    // Punctuation (two characters)
+    ColonColon,     // ::
+    DotDot,         // ..
+    ArrowRight,     // ->
+    // Operator-like (two characters)
+    PlusPlus,       // ++
+    PlusEquals,     // +=
+    MinusMinus,     // --
+    MinusEquals,    // -=
+    StarEquals,     // *=
+    SlashEquals,    // /=
+    PercentEquals,  // %=
+    CaretEquals,    // ^=
+    AndAnd,         // &&
+    AndEquals,      // &=
+    OrOr,           // ||
+    OrEquals,       // |=
+    EqualEqual,     // ==
+    NotEqual,       // !=
+    ShiftLeft,      // <<
+    ShiftRight,     // >>
+    // Operator-like (three characters)
+    ShiftLeftEquals,// <<=
+    ShiftRightEquals, // >>=
+    // Special marker token to indicate end of variable-character tokens
+    SpanEnd,
+}
+
+impl TokenKind {
+    /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is
+    /// the case for tokens of variable length (e.g. an identifier).
+    fn has_span_end(&self) -> bool {
+        return *self <= TokenKind::BlockComment
+    }
+
+    /// Returns the number of characters associated with the token. May only be called on tokens
+    /// that do not have a variable length.
+    fn num_characters(&self) -> u32 {
+        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
+        if *self <= TokenKind::Equal {
+            1
+        } else if *self <= TokenKind::ShiftRight {
+            2
+        } else {
+            3
+        }
+    }
+
+    /// Returns the characters that are represented by the token, may only be called on tokens that
+    /// do not have a variable length.
+    pub fn token_chars(&self) -> &'static str {
+        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
+        use TokenKind as TK;
+        match self {
+            TK::Exclamation => "!",
+            TK::Question => "?",
+            TK::Pound => "#",
+            TK::OpenAngle => "<",
+            TK::OpenCurly => "{",
+            TK::OpenParen => "(",
+            TK::OpenSquare => "[",
+            TK::CloseAngle => ">",
+            TK::CloseCurly => "}",
+            TK::CloseParen => ")",
+            TK::CloseSquare => "]",
+            TK::Colon => ":",
+            TK::Comma => ",",
+            TK::Dot => ".",
+            TK::SemiColon => ";",
+            TK::Quote => "'",
+            TK::DoubleQuote => "\"",
+            TK::At => "@",
+            TK::Plus => "+",
+            TK::Minus => "-",
+            TK::Star => "*",
+            TK::Slash => "/",
+            TK::Percent => "%",
+            TK::Caret => "^",
+            TK::And => "&",
+            TK::Or => "|",
+            TK::Tilde => "~",
+            TK::Equal => "=",
+            TK::ColonColon => "::",
+            TK::DotDot => "..",
+            TK::ArrowRight => "->",
+            TK::PlusPlus => "++",
+            TK::PlusEquals => "+=",
+            TK::MinusMinus => "--",
+            TK::MinusEquals => "-=",
+            TK::StarEquals => "*=",
+            TK::SlashEquals => "/=",
+            TK::PercentEquals => "%=",
+            TK::CaretEquals => "^=",
+            TK::AndAnd => "&&",
+            TK::AndEquals => "&=",
+            TK::OrOr => "||",
+            TK::OrEquals => "|=",
+            TK::EqualEqual => "==",
+            TK::NotEqual => "!=",
+            TK::ShiftLeft => "<<",
+            TK::ShiftRight => ">>",
+            TK::ShiftLeftEquals => "<<=",
+            TK::ShiftRightEquals => ">>=",
+            // Lets keep these in explicitly for now, in case we want to add more symbols
+            TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
+            TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
+        }
+    }
+}
+
+/// Represents a single token at a particular position.
+pub(crate) struct Token {
+    pub kind: TokenKind,
+    pub pos: InputPosition,
+}
+
+impl Token {
+    pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self {
+        Self{ kind, pos }
+    }
+}
+
+/// The kind of token ranges that are specially parsed by the tokenizer.
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) enum TokenRangeKind {
+    Module,
+    Pragma,
+    Import,
+    Definition,
+    Code,
+}
+
+/// A range of tokens with a specific meaning. Such a range is part of a tree
+/// where each parent tree envelops all of its children.
+#[derive(Debug)]
+pub(crate) struct TokenRange {
+    // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
+    // range kind is Module, in that case the parent index points to itself.
+    pub parent_idx: usize,
+    pub range_kind: TokenRangeKind,
+    pub curly_depth: u32,
+    // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
+    pub start: u32,             // first token (inclusive index)
+    pub end: u32,               // last token (exclusive index)
+    // Child ranges
+    pub num_child_ranges: u32,  // Number of subranges
+    pub first_child_idx: u32,   // First subrange (or points to self if no subranges)
+    pub last_child_idx: u32,    // Last subrange (or points to self if no subranges)
+    pub next_sibling_idx: Option<u32>,
+}
+
+pub(crate) struct TokenBuffer {
+    pub tokens: Vec<Token>,
+    pub ranges: Vec<TokenRange>,
+}
+
+impl TokenBuffer {
+    pub(crate) fn new() -> Self {
+        Self{ tokens: Vec::new(), ranges: Vec::new() }
+    }
+
+    pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
+        TokenIter::new(self, range.start as usize, range.end as usize)
+    }
+
+    pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
+        self.tokens[range.start].pos
+    }
+
+    pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
+        let last_token = &self.tokens[range.end - 1];
+        if last_token.kind == TokenKind::SpanEnd {
+            return last_token.pos
+        } else {
+            debug_assert!(!last_token.kind.has_span_end());
+            return last_token.pos.with_offset(last_token.kind.num_characters());
+        }
+    }
+}
+
+/// Iterator over tokens within a specific `TokenRange`.
+pub(crate) struct TokenIter<'a> {
+    tokens: &'a Vec<Token>,
+    cur: usize,
+    end: usize,
+}
+
+impl<'a> TokenIter<'a> {
+    fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
+        Self{ tokens: &buffer.tokens, cur: start, end }
+    }
+
+    /// Returns the next token (may include comments), or `None` if at the end
+    /// of the range.
+    pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
+        if self.cur >= self.end {
+            return None;
+        }
+
+        let token = &self.tokens[self.cur];
+        Some(token.kind)
+    }
+
+    /// Returns the next token (but skips over comments), or `None` if at the
+    /// end of the range
+    pub(crate) fn next(&mut self) -> Option<TokenKind> {
+        while let Some(token_kind) = self.next() {
+            if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
+                return Some(token_kind);
+            }
+            self.consume();
+        }
+
+        return None
+    }
+
+    /// Returns the start position belonging to the token returned by `next`. If
+    /// there is not a next token, then we return the end position of the
+    /// previous token.
+    pub(crate) fn last_valid_pos(&self) -> InputPosition {
+        if self.cur < self.end {
+            // Return token position
+            return self.tokens[self.cur].pos
+        }
+
+        // Return previous token end
+        let token = &self.tokens[self.cur - 1];
+        return if token.kind == TokenKind::SpanEnd {
+            token.pos
+        } else {
+            token.pos.with_offset(token.kind.num_characters());
+        };
+    }
+
+    /// Returns the token range belonging to the token returned by `next`. This
+    /// assumes that we're not at the end of the range we're iterating over.
+    /// TODO: @cleanup Phase out?
+    pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
+        debug_assert!(self.cur < self.end);
+        let token = &self.tokens[self.cur];
+        if token.kind.has_span_end() {
+            let span_end = &self.tokens[self.cur + 1];
+            debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
+            (token.pos, span_end.pos)
+        } else {
+            let offset = token.kind.num_characters();
+            (token.pos, token.pos.with_offset(offset))
+        }
+    }
+
+    /// See `next_positions`
+    pub(crate) fn next_span(&self) -> InputSpan {
+        let (begin, end) = self.next_positions();
+        return InputSpan::from_positions(begin, end)
+    }
+
+    /// Advances the iterator to the next (meaningful) token.
+    pub(crate) fn consume(&mut self) {
+        if let Some(kind) = self.next() {
+            if kind.has_span_end() {
+                self.cur += 2;
+            } else {
+                self.cur += 1;
+            }
+        }
+    }
+}
\ No newline at end of file