From c9800c8f19d701a8c008aa954a7f84141dbc3bda 2022-03-29 16:28:33
From: mh <contact@maxhenger.nl>
Date: 2022-03-29 16:28:33
Subject: [PATCH] Rewrite tokenizer to emit markers iso ranges

---
diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs
index 6ad5f1ca98cfa39f02cad26fa0314df2c6f14413..87e0135bc0e117077ba4a62a7682c557ca1ae55d 100644
--- a/src/protocol/ast.rs
+++ b/src/protocol/ast.rs
@@ -932,7 +932,6 @@ pub struct StructDefinition {
     pub this: StructDefinitionId,
     pub defined_in: RootId,
     // Symbol scanning
-    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     // Parsing
@@ -941,10 +940,10 @@ pub struct StructDefinition {
 
 impl StructDefinition {
     pub(crate) fn new_empty(
-        this: StructDefinitionId, defined_in: RootId, span: InputSpan,
+        this: StructDefinitionId, defined_in: RootId,
         identifier: Identifier, poly_vars: Vec<Identifier>
     ) -> Self {
-        Self{ this, defined_in, span, identifier, poly_vars, fields: Vec::new() }
+        Self{ this, defined_in, identifier, poly_vars, fields: Vec::new() }
     }
 }
 
@@ -965,7 +964,6 @@ pub struct EnumDefinition {
     pub this: EnumDefinitionId,
     pub defined_in: RootId,
     // Symbol scanning
-    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     // Parsing
@@ -974,10 +972,10 @@ pub struct EnumDefinition {
 
 impl EnumDefinition {
     pub(crate) fn new_empty(
-        this: EnumDefinitionId, defined_in: RootId, span: InputSpan,
+        this: EnumDefinitionId, defined_in: RootId,
         identifier: Identifier, poly_vars: Vec<Identifier>
     ) -> Self {
-        Self{ this, defined_in, span, identifier, poly_vars, variants: Vec::new() }
+        Self{ this, defined_in, identifier, poly_vars, variants: Vec::new() }
     }
 }
 
@@ -993,7 +991,6 @@ pub struct UnionDefinition {
     pub this: UnionDefinitionId,
     pub defined_in: RootId,
     // Phase 1: symbol scanning
-    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     // Phase 2: parsing
@@ -1002,10 +999,10 @@ pub struct UnionDefinition {
 
 impl UnionDefinition {
     pub(crate) fn new_empty(
-        this: UnionDefinitionId, defined_in: RootId, span: InputSpan,
+        this: UnionDefinitionId, defined_in: RootId,
         identifier: Identifier, poly_vars: Vec<Identifier>
     ) -> Self {
-        Self{ this, defined_in, span, identifier, poly_vars, variants: Vec::new() }
+        Self{ this, defined_in, identifier, poly_vars, variants: Vec::new() }
     }
 }
 
@@ -1111,7 +1108,6 @@ pub struct ProcedureDefinition {
     pub defined_in: RootId,
     // Symbol scanning
     pub kind: ProcedureKind,
-    pub span: InputSpan,
     pub identifier: Identifier,
     pub poly_vars: Vec<Identifier>,
     // Parser
@@ -1126,12 +1122,11 @@ pub struct ProcedureDefinition {
 
 impl ProcedureDefinition {
     pub(crate) fn new_empty(
-        this: ProcedureDefinitionId, defined_in: RootId, span: InputSpan,
+        this: ProcedureDefinitionId, defined_in: RootId,
         kind: ProcedureKind, identifier: Identifier, poly_vars: Vec<Identifier>
     ) -> Self {
         Self {
             this, defined_in,
-            span,
             kind, identifier, poly_vars,
             source: ProcedureSource::FuncUserDefined,
             return_type: None,
diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs
index da5b60c9ceadb1d990a1c7bc6fa7a78e2b4ea2c9..2e46cc167c05753e9aee603c74506c57a3fd5c9b 100644
--- a/src/protocol/mod.rs
+++ b/src/protocol/mod.rs
@@ -60,7 +60,6 @@ impl ProtocolDescription {
             return Err(format!("{}", err))
         }
 
-        debug_assert_eq!(parser.modules.len(), 1, "only supporting one module here for now");
         let modules: Vec<Module> = parser.modules.into_iter()
             .map(|module| Module{
                 source: module.source,
diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs
index 59235a8cd2397b44eea99458f897fb910aeb2959..39de7073241552963fc106ab248880774a9e45fa 100644
--- a/src/protocol/parser/mod.rs
+++ b/src/protocol/parser/mod.rs
@@ -52,10 +52,9 @@ pub enum ModuleCompilationPhase {
 }
 
 pub struct Module {
-    // Buffers
     pub source: InputSource,
     pub tokens: TokenBuffer,
-    // Identifiers
+    pub is_compiler_file: bool, // TODO: @Hack
     pub root_id: RootId,
     pub name: Option<(PragmaId, StringRef<'static>)>,
     pub version: Option<(PragmaId, i64)>,
@@ -156,7 +155,7 @@ impl Parser {
             pass_typing: PassTyping::new(),
             pass_rewriting: PassRewriting::new(),
             pass_stack_size: PassStackSize::new(),
-            write_tokens_to: Some("tokens.txt".to_string()),
+            write_tokens_to: None,
             write_ast_to: None,
             arch: TargetArch::new(),
         };
@@ -194,21 +193,7 @@ impl Parser {
     /// it internally for later parsing (when all modules are present). Returns
     /// the index of the new module.
     pub fn feed(&mut self, mut source: InputSource) -> Result<usize, ParseError> {
-        let mut token_buffer = TokenBuffer::new();
-        self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;
-
-        let module = Module{
-            source,
-            tokens: token_buffer,
-            root_id: RootId::new_invalid(),
-            name: None,
-            version: None,
-            phase: ModuleCompilationPhase::Tokenized,
-        };
-        let module_index = self.modules.len();
-        self.modules.push(module);
-
-        return Ok(module_index);
+        return self.feed_internal(source, false);
     }
 
     pub fn parse(&mut self) -> Result<(), ParseError> {
@@ -353,7 +338,7 @@ impl Parser {
             let source = source.unwrap();
             let input_source = InputSource::new(file.to_string(), source);
 
-            let module_index = self.feed(input_source);
+            let module_index = self.feed_internal(input_source, true);
             if let Err(err) = module_index {
                 // A bit of a hack, but shouldn't really happen anyway: the
                 // compiler should ship with a decent standard library (at some
@@ -370,6 +355,25 @@ impl Parser {
 
         return Ok(())
     }
+
+    fn feed_internal(&mut self, mut source: InputSource, is_compiler_file: bool) -> Result<usize, ParseError> {
+        let mut token_buffer = TokenBuffer::new();
+        self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;
+
+        let module = Module{
+            source,
+            tokens: token_buffer,
+            is_compiler_file,
+            root_id: RootId::new_invalid(),
+            name: None,
+            version: None,
+            phase: ModuleCompilationPhase::Tokenized,
+        };
+        let module_index = self.modules.len();
+        self.modules.push(module);
+
+        return Ok(module_index);
+    }
 }
 
 fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec<ConcreteTypePart>, has_poly_var: bool, size: usize, alignment: usize) -> TypeId {
diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs
index 607f8caacc21fb71b60c8bca23e60ec45a27fa5d..7f9817270909d65c1cfc3c9fa702aef204e1506e 100644
--- a/src/protocol/parser/pass_definitions.rs
+++ b/src/protocol/parser/pass_definitions.rs
@@ -43,35 +43,33 @@ impl PassDefinitions {
 
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
-        let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
-        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 
-        // Although we only need to parse the definitions, we want to go through
-        // code ranges as well such that we can throw errors if we get
-        // unexpected tokens at the module level of the source.
-        let mut range_idx = module_range.first_child_idx;
-        loop {
-            let range_idx_usize = range_idx as usize;
-            let cur_range = &module.tokens.ranges[range_idx_usize];
-
-            match cur_range.range_kind {
-                TokenRangeKind::Module => unreachable!(), // should not be reachable
-                TokenRangeKind::Pragma | TokenRangeKind::Import => {
-                    // Already fully parsed, fall through and go to next range
-                },
-                TokenRangeKind::Definition | TokenRangeKind::Code => {
-                    // Visit range even if it is a "code" range to provide
-                    // proper error messages.
-                    self.visit_range(modules, module_idx, ctx, range_idx_usize)?;
-                },
+        // We iterate through the entire document. If we find a marker that has
+        // been handled then we skip over it. It is important that we properly
+        // parse all other tokens in the document to ensure that we throw the
+        // correct kind of errors.
+        let num_tokens = module.tokens.tokens.len() as u32;
+        let num_markers = module.tokens.markers.len();
+
+        let mut marker_index = 0;
+        let mut first_token_index = 0;
+        while first_token_index < num_tokens {
+            // Seek ahead to the next marker that was already handled.
+            let mut last_token_index = num_tokens;
+            let mut new_first_token_index = num_tokens;
+            while marker_index < num_markers {
+                let marker = &module.tokens.markers[marker_index];
+                marker_index += 1;
+                if marker.handled {
+                    last_token_index = marker.first_token;
+                    new_first_token_index = marker.last_token;
+                    break;
+                }
             }
 
-            if cur_range.next_sibling_idx == NO_SIBLING {
-                break;
-            } else {
-                range_idx = cur_range.next_sibling_idx;
-            }
+            self.visit_token_range(modules, module_idx, ctx, first_token_index, last_token_index)?;
+            first_token_index = new_first_token_index;
         }
 
         modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed;
@@ -79,15 +77,14 @@ impl PassDefinitions {
         Ok(())
     }
 
-    fn visit_range(
-        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
+    fn visit_token_range(
+        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx,
+        token_range_begin: u32, token_range_end: u32,
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
-        let cur_range = &module.tokens.ranges[range_idx];
-        debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code);
 
         // Detect which definition we're parsing
-        let mut iter = module.tokens.iter_range(cur_range.start, cur_range.end);
+        let mut iter = module.tokens.iter_range(token_range_begin, Some(token_range_end));
         loop {
             let next = iter.next();
             if next.is_none() {
@@ -134,7 +131,7 @@ impl PassDefinitions {
                 let start_pos = iter.last_valid_pos();
                 let parser_type = self.type_parser.consume_parser_type(
                     iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
-                    module_scope, false, None
+                    module_scope, false, false, None
                 )?;
                 let field = consume_ident_interned(source, iter, ctx)?;
                 Ok(StructFieldDefinition{
@@ -221,7 +218,7 @@ impl PassDefinitions {
                         let poly_vars = ctx.heap[definition_id].poly_vars();
                         self.type_parser.consume_parser_type(
                             iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
-                            module_scope, false, None
+                            module_scope, false, false, None
                         )
                     },
                     &mut types_section, "an embedded type", Some(&mut close_pos)
@@ -255,20 +252,21 @@ impl PassDefinitions {
         // Retrieve function name
         consume_exact_ident(&module.source, iter, KW_FUNCTION)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
-        let stringy = String::from_utf8_lossy(ident_text).to_string();
 
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
+        let allow_compiler_types = module.is_compiler_file;
 
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 
         // Parse function's argument list
         let mut parameter_section = self.variables.start_section();
         consume_parameter_list(
-            &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
+            &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section,
+            module_scope, definition_id, allow_compiler_types
         )?;
         let parameters = parameter_section.into_vec();
 
@@ -277,7 +275,7 @@ impl PassDefinitions {
         let poly_vars = ctx.heap[definition_id].poly_vars();
         let parser_type = self.type_parser.consume_parser_type(
             iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, definition_id,
-            module_scope, false, None
+            module_scope, false, allow_compiler_types, None
         )?;
 
         // Consume body
@@ -308,13 +306,15 @@ impl PassDefinitions {
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
+        let allow_compiler_types = module.is_compiler_file;
 
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 
         // Parse component's argument list
         let mut parameter_section = self.variables.start_section();
         consume_parameter_list(
-            &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
+            &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section,
+            module_scope, definition_id, allow_compiler_types
         )?;
         let parameters = parameter_section.into_vec();
 
@@ -346,10 +346,10 @@ impl PassDefinitions {
         if iter.next() == Some(TokenKind::OpenCurly) && iter.peek() == Some(TokenKind::Pragma) {
             // Consume the placeholder "{ #builtin }" tokens
             iter.consume(); // opening curly brace
-            let (pragma, pragma_start, pragma_end) = consume_pragma(&module.source, iter)?;
+            let (pragma, pragma_span) = consume_pragma(&module.source, iter)?;
             if pragma != b"#builtin" {
                 return Err(ParseError::new_error_str_at_span(
-                    &module.source, InputSpan::from_positions(pragma_start, pragma_end),
+                    &module.source, pragma_span,
                     "expected a '#builtin' pragma, or a function body"
                 ));
             }
@@ -863,7 +863,7 @@ impl PassDefinitions {
             let parser_type = self.type_parser.consume_parser_type(
                 iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars,
                 definition_id, SymbolScope::Module(module.root_id),
-                true, Some(angle_start_pos)
+                true, false, Some(angle_start_pos)
             )?;
 
             (parser_type.elements, parser_type.full_span.end)
@@ -959,7 +959,8 @@ impl PassDefinitions {
 
         let parser_type = self.type_parser.consume_parser_type(
             iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars,
-            definition_id, SymbolScope::Definition(definition_id), true, None
+            definition_id, SymbolScope::Definition(definition_id),
+            true, false, None
         );
 
         if let Ok(parser_type) = parser_type {
@@ -1566,7 +1567,7 @@ impl PassDefinitions {
                 let poly_vars = ctx.heap[self.cur_definition].poly_vars();
                 let parser_type = self.type_parser.consume_parser_type(
                     iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, self.cur_definition,
-                    symbol_scope, true, None
+                    symbol_scope, true, false, None
                 )?;
                 debug_assert!(!parser_type.elements.is_empty());
                 match parser_type.elements[0].variant {
@@ -1731,7 +1732,7 @@ impl PassDefinitions {
                         self.type_parser.consume_parser_type(
                             iter, &ctx.heap, &module.source, &ctx.symbols,
                             poly_vars, definition_id, SymbolScope::Module(module.root_id),
-                            true, Some(angle_start_pos)
+                            true, false, Some(angle_start_pos)
                         )?
                     } else {
                         // Automatic casting with inferred target type
@@ -1867,7 +1868,7 @@ fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter,
 fn consume_parameter_list(
     parser: &mut ParserTypeParser, source: &InputSource, iter: &mut TokenIter,
     ctx: &mut PassCtx, target: &mut ScopedSection<VariableId>,
-    scope: SymbolScope, definition_id: DefinitionId
+    scope: SymbolScope, definition_id: DefinitionId, allow_compiler_types: bool
 ) -> Result<(), ParseError> {
     consume_comma_separated(
         TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx,
@@ -1875,7 +1876,7 @@ fn consume_parameter_list(
             let poly_vars = ctx.heap[definition_id].poly_vars(); // Rust being rust, multiple lookups
             let parser_type = parser.consume_parser_type(
                 iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
-                scope, false, None
+                scope, false, allow_compiler_types, None
             )?;
             let identifier = consume_ident_interned(source, iter, ctx)?;
             let parameter_id = ctx.heap.alloc_variable(|this| Variable{
diff --git a/src/protocol/parser/pass_definitions_types.rs b/src/protocol/parser/pass_definitions_types.rs
index 11603e2ffe11da4a97c02a6c51ee3186070a0b61..0f574fc99222e1b195e5f437c2970d4d362ad125 100644
--- a/src/protocol/parser/pass_definitions_types.rs
+++ b/src/protocol/parser/pass_definitions_types.rs
@@ -59,7 +59,8 @@ impl ParserTypeParser {
         &mut self, iter: &mut TokenIter, heap: &Heap, source: &InputSource,
         symbols: &SymbolTable, poly_vars: &[Identifier],
         wrapping_definition: DefinitionId, cur_scope: SymbolScope,
-        allow_inference: bool, inside_angular_bracket: Option<InputPosition>,
+        allow_inference: bool, allow_compiler_types: bool,
+        inside_angular_bracket: Option<InputPosition>,
     ) -> Result<ParserType, ParseError> {
         // Prepare
         self.entries.clear();
@@ -71,9 +72,10 @@ impl ParserTypeParser {
         }
 
         let initial_state = match iter.next() {
-            Some(TokenKind::Ident) => {
+            Some(TokenKind::Ident) | Some(TokenKind::Pragma) => {
                 let element = Self::consume_parser_type_element(
-                    iter, source, heap, symbols, wrapping_definition, poly_vars, cur_scope, allow_inference
+                    iter, source, heap, symbols, wrapping_definition, poly_vars, cur_scope,
+                    allow_inference, allow_compiler_types
                 )?;
                 self.first_pos = element.element_span.begin;
                 self.last_pos = element.element_span.end;
@@ -154,7 +156,8 @@ impl ParserTypeParser {
                     // Allowed tokens: ident (
                     match next {
                         Some(TokenKind::Ident) => self.consume_type_idents(
-                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope, allow_inference, iter
+                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope,
+                            allow_inference, allow_compiler_types, iter
                         )?,
                         Some(TokenKind::OpenParen) => self.consume_open_paren(iter),
                         _ => return Err(ParseError::new_error_str_at_pos(
@@ -168,7 +171,8 @@ impl ParserTypeParser {
                     // We'll strip the nested tuple later in this function
                     match next {
                         Some(TokenKind::Ident) => self.consume_type_idents(
-                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope, allow_inference, iter
+                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope,
+                            allow_inference, allow_compiler_types, iter
                         )?,
                         Some(TokenKind::OpenParen) => self.consume_open_paren(iter),
                         Some(TokenKind::CloseParen) => self.consume_close_paren(source, iter)?,
@@ -182,7 +186,8 @@ impl ParserTypeParser {
                     // Allowed tokens: ident ( > >> )
                     match next {
                         Some(TokenKind::Ident) => self.consume_type_idents(
-                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope, allow_inference, iter
+                            source, heap, symbols, wrapping_definition, poly_vars, cur_scope,
+                            allow_inference, allow_compiler_types, iter
                         )?,
                         Some(TokenKind::OpenParen) => self.consume_open_paren(iter),
                         Some(TokenKind::CloseAngle) => self.consume_close_angle(source, iter)?,
@@ -288,10 +293,12 @@ impl ParserTypeParser {
     fn consume_type_idents(
         &mut self, source: &InputSource, heap: &Heap, symbols: &SymbolTable,
         wrapping_definition: DefinitionId, poly_vars: &[Identifier],
-        cur_scope: SymbolScope, allow_inference: bool, iter: &mut TokenIter
+        cur_scope: SymbolScope, allow_inference: bool, allow_compiler_types: bool,
+        iter: &mut TokenIter
     ) -> Result<(), ParseError> {
         let element = Self::consume_parser_type_element(
-            iter, source, heap, symbols, wrapping_definition, poly_vars, cur_scope, allow_inference
+            iter, source, heap, symbols, wrapping_definition, poly_vars, cur_scope,
+            allow_inference, allow_compiler_types
         )?;
         let depth = self.cur_depth();
         self.last_pos = element.element_span.end;
@@ -428,11 +435,35 @@ impl ParserTypeParser {
     fn consume_parser_type_element(
         iter: &mut TokenIter, source: &InputSource, heap: &Heap, symbols: &SymbolTable,
         wrapping_definition: DefinitionId, poly_vars: &[Identifier],
-        mut scope: SymbolScope, allow_inference: bool,
+        mut scope: SymbolScope, allow_inference: bool, allow_compiler_types: bool,
     ) -> Result<ParserTypeElement, ParseError> {
         use ParserTypeVariant as PTV;
-        let (mut type_text, mut type_span) = consume_any_ident(source, iter)?;
 
+        // Early check for special builtin types available to the compiler
+        if iter.next() == Some(TokenKind::Pragma) {
+            let (type_text, pragma_span) = consume_pragma(source, iter)?;
+            let variant = match type_text {
+                PRAGMA_TYPE_VOID => Some(PTV::Void),
+                PRAGMA_TYPE_PORTLIKE => Some(PTV::InputOrOutput),
+                PRAGMA_TYPE_INTEGERLIKE => Some(PTV::IntegerLike),
+                PRAGMA_TYPE_ARRAYLIKE => Some(PTV::ArrayLike),
+                _ => None,
+            };
+
+            if !allow_compiler_types || variant.is_none() {
+                return Err(ParseError::new_error_str_at_span(
+                    source, pragma_span, "unexpected pragma in type"
+                ));
+            }
+
+            return Ok(ParserTypeElement{
+                variant: variant.unwrap(),
+                element_span: pragma_span,
+            });
+        }
+
+        // No special type, parse as normal
+        let (mut type_text, mut type_span) = consume_any_ident(source, iter)?;
         let variant = match type_text {
             KW_TYPE_MESSAGE => PTV::Message,
             KW_TYPE_BOOL => PTV::Bool,
diff --git a/src/protocol/parser/pass_imports.rs b/src/protocol/parser/pass_imports.rs
index e64e07e87634183cc240d0cf0709336f8c0a7017..7fe1d8c77451ed17ffe7335cbc31969f15b27d68 100644
--- a/src/protocol/parser/pass_imports.rs
+++ b/src/protocol/parser/pass_imports.rs
@@ -25,28 +25,23 @@ impl PassImport {
     }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
-        let module_range = &module.tokens.ranges[0];
         debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::SymbolsScanned));
         debug_assert_eq!(module.phase, ModuleCompilationPhase::SymbolsScanned);
-        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 
-        let mut range_idx = module_range.first_child_idx;
-        loop {
-            let range_idx_usize = range_idx as usize;
-            let cur_range = &module.tokens.ranges[range_idx_usize];
+        let module_root_id = module.root_id;
+        let num_markers = module.tokens.markers.len();
 
-            if cur_range.range_kind == TokenRangeKind::Import {
-                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
-            }
-
-            if cur_range.next_sibling_idx == NO_SIBLING {
-                break;
-            } else {
-                range_idx = cur_range.next_sibling_idx;
+        for marker_index in 0..num_markers {
+            let marker = &modules[module_idx].tokens.markers[marker_index];
+            match marker.kind {
+                TokenMarkerKind::Import => {
+                    self.visit_import_marker(modules, module_idx, ctx, marker_index)?;
+                },
+                TokenMarkerKind::Definition | TokenMarkerKind::Pragma => {},
             }
         }
 
-        let root = &mut ctx.heap[module.root_id];
+        let root = &mut ctx.heap[module_root_id];
         root.imports.extend(self.imports.drain(..));
 
         let module = &mut modules[module_idx];
@@ -55,14 +50,13 @@ impl PassImport {
         Ok(())
     }
 
-    pub(crate) fn visit_import_range(
-        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
+    pub(crate) fn visit_import_marker(
+        &mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
-        let import_range = &module.tokens.ranges[range_idx];
-        debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
+        let marker = &module.tokens.markers[marker_index];
 
-        let mut iter = module.tokens.iter_range(import_range.start, import_range.end);
+        let mut iter = module.tokens.iter_range(marker.first_token, None);
 
         // Consume "import"
         let (_import_ident, import_span) =
@@ -315,6 +309,12 @@ impl PassImport {
         consume_token(&module.source, &mut iter, TokenKind::SemiColon)?;
         self.imports.push(import_id);
 
+        // Update the marker
+        let marker_last_token = iter.token_index();
+        let marker = &mut modules[module_idx].tokens.markers[marker_index];
+        marker.last_token = marker_last_token;
+        marker.handled = true;
+
         Ok(())
     }
 }
diff --git a/src/protocol/parser/pass_rewriting.rs b/src/protocol/parser/pass_rewriting.rs
index a9f869d38c905cdbbe880ecdbd4bc522a8bf5b82..82702bd1056e5706249f1636604aba452aeb820c 100644
--- a/src/protocol/parser/pass_rewriting.rs
+++ b/src/protocol/parser/pass_rewriting.rs
@@ -49,6 +49,10 @@ impl Visitor for PassRewriting {
 
     fn visit_procedure_definition(&mut self, ctx: &mut Ctx, id: ProcedureDefinitionId) -> VisitorResult {
         let definition = &ctx.heap[id];
+        if definition.source.is_builtin() {
+            return Ok(());
+        }
+
         let body_id = definition.body;
         self.current_scope = definition.scope;
         self.current_procedure_id = id;
diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs
index 28d7ba5fce047bdc25d8133f5b7cb6beb42f66e8..3c3b628f105947d8748d9318ac4a237d1606e4fe 100644
--- a/src/protocol/parser/pass_symbols.rs
+++ b/src/protocol/parser/pass_symbols.rs
@@ -45,11 +45,10 @@ impl PassSymbols {
         self.reset();
 
         let module = &mut modules[module_idx];
-        let module_range = &module.tokens.ranges[0];
+        let module_is_compiler_file = module.is_compiler_file;
 
         debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
-        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
-        debug_assert!(module.root_id.is_invalid()); // not set yet,
+        debug_assert!(module.root_id.is_invalid()); // not set yet
 
         // Preallocate root in the heap
         let root_id = ctx.heap.alloc_protocol_description(|this| {
@@ -62,28 +61,21 @@ impl PassSymbols {
         });
         module.root_id = root_id;
 
-        // Retrieve first range index, then make immutable borrow
-        let mut range_idx = module_range.first_child_idx;
-
-        // Visit token ranges to detect definitions and pragmas
-        loop {
+        // Use pragma token markers to detects symbol definitions and pragmas
+        let num_markers = module.tokens.markers.len();
+        for marker_index in 0..num_markers {
             let module = &modules[module_idx];
-            let range_idx_usize = range_idx as usize;
-            let cur_range = &module.tokens.ranges[range_idx_usize];
-            let next_sibling_idx = cur_range.next_sibling_idx;
-            let range_kind = cur_range.range_kind;
+            let marker = &module.tokens.markers[marker_index];
 
             // Parse if it is a definition or a pragma
-            if range_kind == TokenRangeKind::Definition {
-                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
-            } else if range_kind == TokenRangeKind::Pragma {
-                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
-            }
-
-            if next_sibling_idx == NO_SIBLING {
-                break;
-            } else {
-                range_idx = next_sibling_idx;
+            match marker.kind {
+                TokenMarkerKind::Pragma => {
+                    self.visit_pragma_marker(modules, module_idx, ctx, marker_index)?;
+                },
+                TokenMarkerKind::Definition => {
+                    self.visit_definition_marker(modules, module_idx, ctx, marker_index)?;
+                }
+                TokenMarkerKind::Import => {}, // we don't care yet
             }
         }
 
@@ -97,6 +89,14 @@ impl PassSymbols {
             }
         }
 
+        if module_is_compiler_file {
+            debug_assert!(self.symbols.is_empty());
+            ctx.symbols.get_all_symbols_defined_in_scope(module_scope, &mut self.symbols);
+            for symbol in self.symbols.drain(..) {
+                ctx.symbols.insert_symbol_in_global_scope(symbol);
+            }
+        }
+
         // Modify the preallocated root
         let root = &mut ctx.heap[root_id];
         root.pragmas.extend(self.pragmas.drain(..));
@@ -109,32 +109,27 @@ impl PassSymbols {
         Ok(())
     }
 
-    fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
+    fn visit_pragma_marker(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
         let module = &mut modules[module_idx];
-        let range = &module.tokens.ranges[range_idx];
-        let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32);
+        let marker = &module.tokens.markers[marker_index];
+        let mut iter = module.tokens.iter_range(marker.first_token, None);
 
         // Consume pragma name
-        let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?;
+        let (pragma_section, mut pragma_span) = consume_pragma(&module.source, &mut iter)?;
 
         // Consume pragma values
         if pragma_section == b"#module" {
             // Check if name is defined twice within the same file
             if self.has_pragma_module {
-                return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice"));
+                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module name is defined twice"));
             }
 
-            // Consume the domain-name
+            // Consume the domain-name, then record end of pragma
             let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
-
-            // TODO: Fix with newer token range parsing
-            module.tokens.ranges[range_idx as usize].end = iter.token_index();
-            // if iter.next().is_some() {
-            //     return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
-            // }
+            let marker_last_token = iter.token_index();
 
             // Add to heap and symbol table
-            let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
+            pragma_span.end = module_span.end;
             let module_name = ctx.pool.intern(module_name);
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                 this,
@@ -156,49 +151,51 @@ impl PassSymbols {
                 ));
             }
 
+            let marker = &mut module.tokens.markers[marker_index];
+            marker.last_token = marker_last_token;
+            marker.handled = true;
+
             module.name = Some((pragma_id, module_name));
             self.has_pragma_module = true;
         } else if pragma_section == b"#version" {
             // Check if version is defined twice within the same file
             if self.has_pragma_version {
-                return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice"));
+                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module version is defined twice"));
             }
 
             // Consume the version pragma
             let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
+            let marker_last_token = iter.token_index();
+
+            pragma_span.end = version_span.end;
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                 this,
-                span: InputSpan::from_positions(pragma_start, version_span.end),
+                span: pragma_span,
                 version,
             }));
             self.pragmas.push(pragma_id);
 
+            let marker = &mut module.tokens.markers[marker_index];
+            marker.last_token = marker_last_token;
+            marker.handled = true;
+
             module.version = Some((pragma_id, version as i64));
             self.has_pragma_version = true;
-        } else {
-            // Custom pragma, maybe we support this in the future, but for now
-            // we don't.
-            return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name"));
-        }
+        } // else: custom pragma used for something else, will be handled later (or rejected with an error)
 
         Ok(())
     }
 
-    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
+    fn visit_definition_marker(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
         let module = &modules[module_idx];
-        let range = &module.tokens.ranges[range_idx];
-        let definition_span = InputSpan::from_positions(
-            module.tokens.start_pos(range),
-            module.tokens.end_pos(range)
-        );
-        let mut iter = module.tokens.iter_range(range.start, range.end);
+        let marker = &module.tokens.markers[marker_index];
+        let mut iter = module.tokens.iter_range(marker.first_token, None);
 
         // First ident must be type of symbol
         let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
 
         // Retrieve identifier of definition
         let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
-        println!("DEBUG: Parsing {} --- {}", String::from_utf8_lossy(kw_text).to_string(), identifier.value.as_str());
         let mut poly_vars = Vec::new();
         maybe_consume_comma_separated(
             TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
@@ -214,28 +211,28 @@ impl PassSymbols {
         match kw_text {
             KW_STRUCT => {
                 let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
-                    StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
+                    StructDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Struct;
                 ast_definition_id = struct_def_id.upcast();
             },
             KW_ENUM => {
                 let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
-                    EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
+                    EnumDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Enum;
                 ast_definition_id = enum_def_id.upcast();
             },
             KW_UNION => {
                 let union_def_id = ctx.heap.alloc_union_definition(|this| {
-                    UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
+                    UnionDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Union;
                 ast_definition_id = union_def_id.upcast()
             },
             KW_FUNCTION => {
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
-                    ProcedureDefinition::new_empty(this, module.root_id, definition_span, ProcedureKind::Function, identifier, poly_vars)
+                    ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Function, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Function;
                 ast_definition_id = proc_def_id.upcast();
@@ -247,7 +244,7 @@ impl PassSymbols {
                     ProcedureKind::Composite
                 };
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
-                    ProcedureDefinition::new_empty(this, module.root_id, definition_span, procedure_kind, identifier, poly_vars)
+                    ProcedureDefinition::new_empty(this, module.root_id, procedure_kind, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Component;
                 ast_definition_id = proc_def_id.upcast();
@@ -260,7 +257,6 @@ impl PassSymbols {
             variant: SymbolVariant::Definition(SymbolDefinition{
                 defined_in_module: module.root_id,
                 defined_in_scope: SymbolScope::Module(module.root_id),
-                definition_span,
                 identifier_span: ident_span,
                 imported_at: None,
                 class: definition_class,
diff --git a/src/protocol/parser/pass_tokenizer.rs b/src/protocol/parser/pass_tokenizer.rs
index 6cb83d224693328d20a184c3c0bb26eb8c9b2482..e00569be823498ec4cb3e1b2258be896d29a78c0 100644
--- a/src/protocol/parser/pass_tokenizer.rs
+++ b/src/protocol/parser/pass_tokenizer.rs
@@ -66,9 +66,11 @@ impl PassTokenizer {
             } else if is_identifier_start(c) {
                 let ident = self.consume_identifier(source, target)?;
 
-                if demarks_definition(ident) {
+                if demarks_symbol(ident) {
+                    self.emit_marker(target, TokenMarkerKind::Definition, token_index);
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
+                    self.emit_marker(target, TokenMarkerKind::Import, token_index);
                     self.push_range(target, TokenRangeKind::Import, token_index);
                 }
             } else if is_integer_literal_start(c) {
@@ -76,6 +78,7 @@ impl PassTokenizer {
             } else if is_pragma_start_or_pound(c) {
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
+                    self.emit_marker(target, TokenMarkerKind::Pragma, token_index);
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
                 }
             } else if self.is_line_comment_start(c, source) {
@@ -655,6 +658,22 @@ impl PassTokenizer {
         }
     }
 
+    fn emit_marker(&mut self, target: &mut TokenBuffer, kind: TokenMarkerKind, first_token: u32) {
+        debug_assert!(
+            target.markers
+                .last().map(|v| v.first_token < first_token)
+                .unwrap_or(true)
+        );
+
+        target.markers.push(TokenMarker{
+            kind,
+            curly_depth: self.curly_stack.len() as u32,
+            first_token,
+            last_token: u32::MAX,
+            handled: false,
+        });
+    }
+
     fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_idx = self.stack_idx as i32;
@@ -727,7 +746,7 @@ impl PassTokenizer {
 }
 
 // Helpers for characters
-fn demarks_definition(ident: &[u8]) -> bool {
+fn demarks_symbol(ident: &[u8]) -> bool {
     return
         ident == KW_STRUCT ||
             ident == KW_ENUM ||
diff --git a/src/protocol/parser/pass_typing.rs b/src/protocol/parser/pass_typing.rs
index d5cb3b9311832333ccdf6af675baabc98e6c5b32..99234f7eb2f1dec4b63694868262d7f8d24f1e9c 100644
--- a/src/protocol/parser/pass_typing.rs
+++ b/src/protocol/parser/pass_typing.rs
@@ -1223,6 +1223,7 @@ impl PassTyping {
         self.procedure_id = id;
         self.procedure_kind = procedure_def.kind;
         let body_id = procedure_def.body;
+        let procedure_is_builtin = procedure_def.source.is_builtin();
 
         debug_log!("{}", "-".repeat(50));
         debug_log!("Visiting procedure: '{}' (id: {}, kind: {:?})", procedure_def.identifier.value.as_str(), id.0.index, procedure_def.kind);
@@ -1245,7 +1246,11 @@ impl PassTyping {
 
         // Visit all of the expressions within the body
         self.parent_index = None;
-        return self.visit_block_stmt(ctx, body_id);
+        if !procedure_is_builtin {
+            return self.visit_block_stmt(ctx, body_id);
+        } else {
+            return Ok(());
+        }
     }
 
     // Statements
diff --git a/src/protocol/parser/pass_validation_linking.rs b/src/protocol/parser/pass_validation_linking.rs
index 5490af080f9b8eec88f98caa0ec7b0be6ad14862..19b463957c8388f64b0bd8eefdd1ea888cf44554 100644
--- a/src/protocol/parser/pass_validation_linking.rs
+++ b/src/protocol/parser/pass_validation_linking.rs
@@ -200,6 +200,7 @@ impl Visitor for PassValidationLinking {
 
         let definition = &ctx.heap[id];
         let body_id = definition.body;
+        let definition_is_builtin = definition.source.is_builtin();
         let section = self.variable_buffer.start_section_initialized(&definition.parameters);
         for variable_idx in 0..section.len() {
             let variable_id = section[variable_idx];
@@ -207,8 +208,11 @@ impl Visitor for PassValidationLinking {
         }
         section.forget();
 
-        // Visit statements in function body
-        self.visit_block_stmt(ctx, body_id)?;
+        // Visit statements in function body, if present at all
+        if !definition_is_builtin {
+            self.visit_block_stmt(ctx, body_id)?;
+        }
+
         self.pop_scope(old_scope);
 
         self.resolve_pending_control_flow_targets(ctx)?;
diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs
index 0d64b4d7d5515fa4c0c530f8dc43dff3d74f36b6..088dc52d102199b6cf2e4b2f981cb4aebc30af6f 100644
--- a/src/protocol/parser/symbol_table.rs
+++ b/src/protocol/parser/symbol_table.rs
@@ -85,7 +85,6 @@ pub struct SymbolDefinition {
     // spans and module IDs
     pub defined_in_module: RootId,
     pub defined_in_scope: SymbolScope,
-    pub definition_span: InputSpan, // full span of definition
     pub identifier_span: InputSpan, // span of just the identifier
     // Location where the symbol is introduced in its scope
     pub imported_at: Option<ImportId>,
@@ -231,6 +230,14 @@ impl SymbolTable {
         Ok(())
     }
 
+    /// Insert a symbol in the global scope. Naturally there will be a
+    /// collision (as the symbol originates from a module), so we do *not* check
+    /// for this.
+    pub(crate) fn insert_symbol_in_global_scope(&mut self, symbol: Symbol) {
+        let scoped_symbols = self.scope_lookup.get_mut(&SymbolScope::Global).unwrap();
+        scoped_symbols.symbols.push(symbol);
+    }
+
     /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The
     /// returned symbol may both be imported as defined within any of the searched scopes.
     pub(crate) fn get_symbol_by_name(
diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs
index 1a81840b3c5a4c112d01c7cd0e032cc66ba87ecd..4de8f5c7636b1fc002d1ebaf7473666a274234b6 100644
--- a/src/protocol/parser/token_parsing.rs
+++ b/src/protocol/parser/token_parsing.rs
@@ -86,6 +86,15 @@ pub(crate) const KW_TYPE_CHAR:     &'static [u8] = KW_TYPE_CHAR_STR.as_bytes();
 pub(crate) const KW_TYPE_STRING:   &'static [u8] = KW_TYPE_STRING_STR.as_bytes();
 pub(crate) const KW_TYPE_INFERRED: &'static [u8] = KW_TYPE_INFERRED_STR.as_bytes();
 
+// Builtin pragma types
+// Not usable by the programmer, but usable in the standard library. These hint
+// at the fact that we need a different system (e.g. function overloading)
+pub(crate) const PRAGMA_TYPE_VOID: &'static [u8] = b"#type_void";
+pub(crate) const PRAGMA_TYPE_PORTLIKE: &'static [u8] = b"#type_portlike";
+pub(crate) const PRAGMA_TYPE_INTEGERLIKE: &'static [u8] = b"#type_integerlike";
+pub(crate) const PRAGMA_TYPE_ARRAYLIKE: &'static [u8] = b"#type_arraylike";
+
+
 /// A special trait for when consuming comma-separated things such that we can
 /// push them onto a `Vec` and onto a `ScopedSection`. As we monomorph for
 /// very specific comma-separated cases I don't expect polymorph bloat.
@@ -449,13 +458,13 @@ fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8)
     Ok(result)
 }
 
-pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
+pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
     if Some(TokenKind::Pragma) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
     }
-    let (pragma_start, pragma_end) = iter.next_positions();
+    let pragma_span = iter.next_span();
     iter.consume();
-    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
+    Ok((source.section_at_span(pragma_span), pragma_span))
 }
 
 pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs
index d8c6b7d25b01b89aea578664be187660c0a025bb..72c019ae259a40e7a4d0e92e0e6b4ea93e8183f8 100644
--- a/src/protocol/parser/tokens.rs
+++ b/src/protocol/parser/tokens.rs
@@ -170,6 +170,29 @@ impl Token {
     }
 }
 
+#[derive(Debug, Clone, Copy)]
+pub enum TokenMarkerKind {
+    Pragma,
+    Import,
+    Definition,
+}
+
+/// A marker for a specific token. These are stored separately from the array of
+/// tokens. These are used for initial symbol, module name, and import
+/// discovery.
+#[derive(Debug)]
+pub struct TokenMarker {
+    pub kind: TokenMarkerKind,
+    pub curly_depth: u32,
+    // Indices into token buffer. The first token is inclusive and set upon
+    // tokenization, the last token is set at a later stage in parsing (e.g.
+    // at symbol discovery we may parse some of the `Pragma` tokens and set the
+    // last parsed token)
+    pub first_token: u32,
+    pub last_token: u32,
+    pub handled: bool,
+}
+
 /// The kind of token ranges that are specially parsed by the tokenizer.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenRangeKind {
@@ -204,31 +227,25 @@ pub struct TokenRange {
 
 pub struct TokenBuffer {
     pub tokens: Vec<Token>,
+    pub markers: Vec<TokenMarker>,
     pub ranges: Vec<TokenRange>,
 }
 
 impl TokenBuffer {
     pub(crate) fn new() -> Self {
-        Self{ tokens: Vec::new(), ranges: Vec::new() }
-    }
-
-    pub(crate) fn iter_range<'a>(&'a self, inclusive_start: u32, exclusive_end: u32) -> TokenIter<'a> {
-        debug_assert!(exclusive_end as usize <= self.tokens.len());
-        TokenIter::new(self, inclusive_start as usize, exclusive_end as usize)
-    }
-
-    pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
-        self.tokens[range.start as usize].pos
+        return Self{
+            tokens: Vec::new(),
+            markers: Vec::new(),
+            ranges: Vec::new()
+        };
     }
 
-    pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
-        let last_token = &self.tokens[range.end as usize - 1];
-        if last_token.kind == TokenKind::SpanEnd {
-            return last_token.pos
-        } else {
-            debug_assert!(!last_token.kind.has_span_end());
-            return last_token.pos.with_offset(last_token.kind.num_characters());
-        }
+    pub(crate) fn iter_range(
+        &self, inclusive_start: u32, exclusive_end: Option<u32>
+    ) -> TokenIter {
+        let exclusive_end = exclusive_end.unwrap_or(self.tokens.len() as u32) as usize;
+        debug_assert!(exclusive_end <= self.tokens.len());
+        TokenIter::new(self, inclusive_start as usize, exclusive_end)
     }
 }
 
diff --git a/src/protocol/tests/utils.rs b/src/protocol/tests/utils.rs
index 735cbdc11c4a0e1c5eeb1c13f21d34f281ea3d49..d0694aa883873531183aedd7c23ba71dee352d16 100644
--- a/src/protocol/tests/utils.rs
+++ b/src/protocol/tests/utils.rs
@@ -600,7 +600,8 @@ impl<'a> FunctionTester<'a> {
 
         // Find the first occurrence of the expression after the definition of
         // the function, we'll check that it is included in the body later.
-        let mut outer_match_idx = self.def.span.begin.offset as usize;
+        let body = &self.ctx.heap[self.def.body];
+        let mut outer_match_idx = body.span.begin.offset as usize;
         while outer_match_idx < module.source.input.len() {
             if module.source.input[outer_match_idx..].starts_with(outer_match.as_bytes()) {
                 break;
diff --git a/src/protocol/token_writer.rs b/src/protocol/token_writer.rs
index 37d52770c34a4efb22b06de483e15060d773c12b..ac28761f48e5a136126b1c0addd1b37782400b73 100644
--- a/src/protocol/token_writer.rs
+++ b/src/protocol/token_writer.rs
@@ -5,7 +5,7 @@ use std::io::Write as IOWrite;
 
 use crate::protocol::input_source::{InputSource, InputSpan};
 use crate::protocol::parser::Module;
-use crate::protocol::tokens::{Token, TokenBuffer, TokenKind, TokenRange, TokenIter, TokenRangeKind};
+use crate::protocol::tokens::{Token, TokenKind, TokenRange};
 
 pub(crate) struct TokenWriter {
     buffer: String,
diff --git a/std/std.global.pdl b/std/std.global.pdl
index 49e3269e94b9c2443b72716c0e2981f16edd1ee0..6ad5a3aeb2b8fb5597794a4d8ac15bd4462728b4 100644
--- a/std/std.global.pdl
+++ b/std/std.global.pdl
@@ -7,8 +7,8 @@
 
 func get<T>(in<T> input) -> T { #builtin }
 func put<T>(out<T> output, T value) -> #type_void { #builtin }
-func fires<T>(#type_portlike <T>) -> bool { #builtin }
-func create<T>(#type_integerlike length) -> T[] { #builtin }
-func length<T>(#type_arraylike <T> array) -> u32 { #builtin }
+func fires<T>(#type_portlike<T> port) -> bool { #builtin }
+func create<T>(#type_integerlike len) -> T[] { #builtin }
+func length<T>(#type_arraylike<T> array) -> u32 { #builtin }
 func assert(bool condition) -> #type_void { #builtin }
 func print(string message) -> #type_void { #builtin }
\ No newline at end of file